Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • faproietti/ar2018
  • chierici/ar2018
  • SDDS/ar2018
  • cnaf/annual-report/ar2018
4 results
Show changes
Commits on Source (413)
Showing
with 1455 additions and 111 deletions
......@@ -63,80 +63,94 @@ fi
cd ${builddir}
# prepare cover
#link_pdf cover cover.pdf
#link_pdf experiment experiment.pdf
#link_pdf datacenter datacenter.pdf
#link_pdf research research.pdf
#link_pdf transfer transfer.pdf
#link_pdf additional additional.pdf
link_pdf cover cover.pdf
link_pdf experiment experiment.pdf
link_pdf datacenter datacenter.pdf
link_pdf research research.pdf
link_pdf transfer transfer.pdf
link_pdf additional additional.pdf
build_from_source user-support main.tex *.PNG
build_from_source ams AMS-report-2019.tex AMS_nuovo.pdf contributors.pdf He-MC.pdf input_output.jpg production_jobs.jpg
#build_from_source alice alice.tex *.png *.eps
#build_from_source atlas atlas.tex
#build_from_source borexino borexino.tex
build_from_source alice main.tex *.png
build_from_source atlas atlas.tex
build_from_source borexino Borexino_CNAFreport2018.tex
build_from_source cms report-cms-feb-2019.tex tier1-jobs-2018.pdf tier1-readiness-2018.pdf
link_pdf belle Cnaf-2019-5.0.pdf
#build_from_source cosa cosa.tex biblio.bib beegfs.PNG
#build_from_source cnprov cnprov.tex
#build_from_source cta cta.tex *.eps
#build_from_source cuore cnaf_cuore.tex cnaf_cuore.bib
#build_from_source cupid cupid.tex cupid.bib
#link_pdf dampe dampe.pdf
#link_pdf darkside ds.pdf
build_from_source cnprov cnprov.tex
build_from_source cta CTA_annualreport_2018_v1.tex *.eps
build_from_source cuore cuore.tex cuore.bib
build_from_source cupid main.tex cupid-biblio.bib
build_from_source dampe main.tex *.jpg *.png
build_from_source darkside ds-annual-report-2019.tex
#build_from_source eee eee.tex EEEarch.eps EEEmonitor.eps EEEtracks.png ELOGquery.png request.png
#build_from_source exanest exanest.tex biblio.bib monitoring.PNG storage.png
build_from_source test TEST.tex test.eps
#build_from_source fazia fazia.tex
build_from_source fermi fermi.tex
build_from_source gamma gamma.tex
build_from_source icarus report_2018.tex *.png
#build_from_source gerda gerda.tex *.pdf
#build_from_source glast glast.tex
#link_pdf juno juno.pdf
link_pdf juno juno-annual-report-2019.pdf
build_from_source km3net km3net.tex compmodel.png threetier.png
build_from_source na62 main.tex
build_from_source newchim repnewchim18.tex fig1.png
#build_from_source lhcb lhcb.tex *.jpg
#build_from_source lhcf lhcf.tex
build_from_source lhcb lhcb.tex *.png
build_from_source lhcf lhcf.tex
build_from_source limadou limadou.tex
#build_from_source lowcostdev lowcostdev.tex *.jpg
#build_from_source lspe lspe.tex biblio.bib lspe_data_path.pdf
build_from_source virgo AdV_computing_CNAF.tex
build_from_source xenon main.tex xenon-computing-model.pdf
build_from_source sc18 SC18.tex *.png
#build_from_source mw-esaco mw-esaco.tex *.png
#build_from_source mw-kube mw-kube.tex
#build_from_source mw-cdmi-storm mw-cdmi-storm.tex *.png *.jpeg
#build_from_source mw-software mw-software.tex
#build_from_source mw-iam mw-iam.tex
## Research and Developments
build_from_source sd_iam main.tex biblio.bib *.png
build_from_source sd_storm main.tex biblio.bib *.png
build_from_source sd_storm2 main.tex biblio.bib *.png
build_from_source sd_nginx_voms main.tex biblio.bib *.png
#build_from_source na62 na62.tex
#link_pdf padme padme.pdf
link_pdf padme 2019_PADMEcontribution.pdf
#build_from_source xenon xenon.tex xenon-computing-model.pdf
#build_from_source sysinfo sysinfo.tex pres_rundeck.png deploy_grafana.png
build_from_source sysinfo sysinfo.tex *.png
#link_pdf virgo VirgoComputing.pdf
#build_from_source tier1 tier1.tex
build_from_source tier1 tier1.tex *.png
#build_from_source flood theflood.tex *.png
#build_from_source farming farming.tex
build_from_source HTC_testbed HTC_testbed_AR2018.tex
build_from_source farming ARFarming2018.tex *.png *.jpg
#build_from_source dynfarm dynfarm.tex
#build_from_source storage storage.tex *.png Huawei_rack.JPG
build_from_source storage storage.tex *.PNG
#build_from_source seagate seagate.tex biblio.bib *.png *.jpg
#build_from_source dataclient dataclient.tex
#build_from_source ltpd ltpd.tex *.png
#build_from_source net net.tex *.png
build_from_source net main.tex *.png
#build_from_source ssnn1 ssnn.tex *.jpg
#build_from_source ssnn2 vmware.tex *.JPG *.jpg
#build_from_source infra Chiller.tex chiller-location.png
build_from_source audit Audit-2018.tex image.png
#build_from_source cloud_cnaf cloud_cnaf.tex *.png
#build_from_source srp SoftRel.tex ar2017.bib
build_from_source dmsq dmsq2018.tex ar2018.bib
#build_from_source st StatMet.tex sm2017.bib
#build_from_source cloud_a cloud_a.tex *.pdf
build_from_source ds_eoscpilot ds_eoscpilot.tex *.png
build_from_source ds_eoschub ds_eoschub.tex *.png
build_from_source ds_cloud_c ds_cloud_c.tex *.png
build_from_source ds_infn_cc ds_infn_cc.tex *.png
build_from_source ds_devops_pe ds_devops_pe.tex *.png
#build_from_source cloud_b cloud_b.tex *.png *.jpg
#build_from_source cloud_c cloud_c.tex *.png *.pdf
#build_from_source cloud_d cloud_d.tex *.png
build_from_source sdds-xdc SDDS-XDC.tex *.png
build_from_source sdds-deep SDDS-DEEP.tex *.png
build_from_source PhD_DataScience_2018 PhD-DataScience-2018.tex
build_from_source chnet dhlab.tex *.png
#build_from_source pett pett.tex bibliopett.bib
#build_from_source iso iso.tex 27001.png biblioiso.bib
build_from_source pett pett.tex bibliopett.bib
build_from_source summerstudent summerstudent.tex *.png
pdflatex ${topdir}/cnaf-annual-report-2018.tex \
&& pdflatex ${topdir}/cnaf-annual-report-2018.tex 2> /dev/null \
......
......@@ -28,7 +28,7 @@
%\author{}
%\maketitle
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/cover.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/cover.pdf}
\newpage
\thispagestyle{empty}
......@@ -82,7 +82,46 @@ Tel. +39 051 209 5475, Fax +39 051 209 5477\\
\markboth{\MakeUppercase{Introduction}}{\MakeUppercase{Introduction}}
\chapter*{Introduction}
\thispagestyle{plain}
Introducing the sixth annual report of CNAF...
\small The first months of 2018 were still affected by the effects of the flooding suffered in November 2017 and it was only in March 2018
that our data center was able to resume its full activity.
Despite this, the overall performance of the Tier 1 for the LHC experiments and for the many other astroparticle and nuclear physics experiments was very good,
and enough to place CNAF's Tier 1 among the most productive ones in the WLCG ecosystem, as the reports of the experiments in this document show.
Even the activities of both the HPC clusters and the Cloud@CNAF infrastructure resumed regular operations after the systems have been brought back to CNAF
from the sites that had temporarily hosted them.
The flooding had indeed beneficial repercussions in speeding up the decision to find a new location for our data center.
The move was already planned in order to face the challenges of High-Luminosity LHC and of the astroparticle experiments that will begin their data acquisition
in the second half of 2020, but the dramatic event of November 2017 made the fragility and weaknesses of the current installation clear.
Also, during 2018 three events have matured paving the way for the definition of a development strategy towards both a new site and a new computing model,
that includes the possibility to exploit the computing power of the HPC systems: the availability of a big area such as Bologna Tecnopolo where to install
our new data center; the possibility of a joint upgrade together with the Italian supercomputing center CINECA thanks to European and Italian funding;
the additional funds from the Italian Government for a project aimed at strengthening the INFN computing infrastructures.
Our R\&D activities have proceeded regularly, meeting the expected milestones and deliverables.
In particular, the path towards a European Open Science Cloud (EOSC) has seen significant progress thanks to the EOSCHub and EOSCPilot projects,
in both of which CNAF plays an important role. Contributions to the EOSC have also come from other H2020 projects in which we are involved,
namely XDC-eXtreme Data Cloud, which focuses mainly on data management services evolved for a context of distributed resources,
and DEEP-Hybrid DataCloud, which addresses the need to support intensive computing techniques, requiring specialized HPC hardware,
to explore very large data sets.
The External Projects and Technology Transfer (PETT) Organizational Unit has contributed to various projects in the field of computing,
communication of science, technology transfer and education. Great effort has been dedicated to the consolidation of the Technology Transfer Laboratory (INFN-TTLab),
a collaboration between CNAF and the INFN divisions of Bologna and Ferrara with the goal of promoting the transfer of our know-how towards regional enterprises.
2018 has also been the first full year in which the TTLab operated an ISO-27001 ISMS consisting of a subset of the Data Center resources.
Such certification, which was acquired in order to be qualified for storing and managing sensitive data,
could open new opportunities of exploitation of our resources in the next future.
Also noteworthy is the involvement of CNAF in the INFN Cultural Heritage Network (CHNet),
where our expertise in Cloud technologies and software development is put to good use for the preparation of a digital library
where members of the network can safely store their datasets and have access to applications for their processing.
This report about the accomplishments of CNAF during 2018 arrives just at the end of 2019.
The delay is due to higher-priority commitments that have overlapped with its finalization,
but we are well aware that such situation affects its usefulness as a means of transparency towards our stakeholders
and of recognition of the hard work and dedication of the personnel of the Center.
To prevent similar situations in the future we are adopting some corrections to the editing process
already for the report about the year 2019, and we are also planning some interesting surprises that we hope will please our readers.
\begin{flushright}
\parbox{0.7\textwidth}{
......@@ -127,7 +166,7 @@ Introducing the sixth annual report of CNAF...
%\addcontentsline{toc}{chapter}{Scientific Exploitation of CNAF ICT Resources}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/esperiment.pdf}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
%\ip{Scientific Exploitation of CNAF ICT Resources}
......@@ -141,36 +180,38 @@ Introducing the sixth annual report of CNAF...
\phantomsection
\addcontentsline{toc}{part}{Scientific Exploitation of CNAF ICT Resources}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
\cleardoublepage
\ia{User and Operational Support at CNAF}{user-support}
%\ia{ALICE computing at the INFN CNAF Tier 1}{alice}
\ia{ALICE computing at the INFN CNAF Tier 1}{alice}
\ia{AMS-02 data processing and analysis at CNAF}{ams}
%\ia{The ATLAS experiment at the INFN CNAF Tier 1}{atlas}
%\ia{The Borexino-SOX experiment at the INFN CNAF Tier 1}{borexino}
%\ia{The Cherenkov Telescope Array}{cta}
\ia{The ATLAS experiment at the INFN CNAF Tier 1}{atlas}
\ia{The Borexino experiment at the INFN-CNAF}{borexino}
\ia{The Cherenkov Telescope Array}{cta}
\ia{The CMS experiment at the INFN CNAF Tier 1}{cms}
\ia{The Belle II experiment at CNAF}{belle}
\ia{CSES-Limadou at CNAF}{limadou}
%\ia{CUORE experiment}{cuore}
%\ia{CUPID-0 experiment}{cupid}
%\ia{DAMPE data processing and analysis at CNAF}{dampe}
%\ia{DarkSide-50 experiment at CNAF}{darkside}
\ia{CUORE experiment}{cuore}
\ia{CUPID-0 experiment}{cupid}
\ia{DAMPE data processing and analysis at CNAF}{dampe}
\ia{DarkSide program at CNAF}{darkside}
%\ia{The EEE Project activity at CNAF}{eee}
\ia{TEST FOR COMMITTEE}{test}
\ia{The \emph{Fermi}-LAT experiment}{fermi}
%\ia{Fazia: running dynamical simulations for heavy ion collisions at Fermi energies}{fazia}
%\ia{The Fermi-LAT experiment}{glast}
\ia{GAMMA experiment}{gamma}
\ia{ICARUS}{icarus}
%\ia{The GERDA experiment}{gerda}
%\ia{Juno experimenti at CNAF}{juno}
\ia{Juno experimenti at CNAF}{juno}
\ia{The KM3NeT neutrino telescope network and CNAF}{km3net}
\ia{The NEWCHIM activity at CNAF for the CHIMERA and FARCOS devices}{newchim}
%\ia{LHCb Computing at CNAF}{lhcb}
%\ia{The LHCf experiment}{lhcf}
\ia{LHCb Computing at CNAF}{lhcb}
\ia{The LHCf experiment}{lhcf}
%\ia{The LSPE experiment at INFN CNAF}{lspe}
%\ia{The NA62 experiment at CERN}{na62}
%\ia{The PADME experiment at INFN CNAF}{padme}
%\ia{XENON computing activities}{xenon}
\ia{The NA62 experiment at CERN}{na62}
\ia{The NEWCHIM activity at CNAF for the CHIMERA and FARCOS devices}{newchim}
\ia{The PADME experiment at INFN CNAF}{padme}
\ia{XENON computing model}{xenon}
\ia{Advanced Virgo computing at CNAF}{virgo}
%
% to keep together the next part title with its chapters in the toc
%\addtocontents{toc}{\newpage}
......@@ -180,62 +221,67 @@ Introducing the sixth annual report of CNAF...
\phantomsection
\addcontentsline{toc}{part}{The Tier 1 and Data center}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/datacenter.pdf}
%\ia{The INFN Tier 1 data center}{tier1}
%\ia{The computing farm}{farming}
%\ia{Data management and storage systems}{storage}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/datacenter.pdf}
\ia{The INFN Tier 1}{tier1}
\ia{The INFN-Tier 1: the computing farm}{farming}
\ia{Data management and storage systems}{storage}
%\ia{Evaluation of the ClusterStor G200 Storage System}{seagate}
%\ia{Activity of the INFN CNAF Long Term Data Preservation (LTDP) group}{ltpd}
%\ia{The INFN Tier 1: Network}{net}
\ia{The INFN-Tier 1: Network and Security}{net}
%\ia{Cooling system upgrade and Power Usage Effectiveness improvement in the INFN CNAF Tier 1 infrastructure}{infra}
%\ia{National ICT Services Infrastructure and Services}{ssnn1}
%\ia{National ICT Services hardware and software infrastructures for Central Services}{ssnn2}
%\ia{The INFN Information System}{sysinfo}
%\ia{CNAF Provisioning system: On the way to Puppet 5}{cnprov}
\ia{The INFN Information System}{sysinfo}
\ia{CNAF Provisioning system: Puppet 5 upgrade}{cnprov}
\ia{Evaluating Migration of INFN–T1 from
CREAM-CE/LSF to HTCondor-CE/HTCondor}{HTC_testbed}
\cleardoublepage
\thispagestyle{empty}
\phantomsection
\addcontentsline{toc}{part}{Research and Developments}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/research.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/research.pdf}
\cleardoublepage
%\ia{Continuous Integration and Delivery with Kubernetes}{mw-kube}
%\ia{Middleware support, maintenance and development}{mw-software}
%\ia{Evolving the INDIGO IAM service}{mw-iam}
%\ia{Esaco: an OAuth/OIDC token introspection service}{mw-esaco}
%\ia{StoRM Quality of Service and Data Lifecycle support through CDMI}{mw-cdmi-storm}
%\ia{A low-cost platform for space software development}{lowcostdev}
%\ia{Overview of Software Reliability literature}{srp}
\ia{Evolving the INDIGO IAM service}{sd_iam}
\ia{StoRM maintenance and evolution}{sd_storm}
\ia{StoRM 2: initial design and development activities}{sd_storm2}
\ia{A VOMS module for the Nginx web server}{sd_nginx_voms}
\ia{Comparing Data Mining Techniques for Software Defect Prediction}{dmsq}
%\ia{Summary of a tutorial on statistical methods}{st}
%\ia{Dynfarm: Transition to Production}{dynfarm}
%\ia{Official testing and increased compatibility for Dataclient}{dataclient}
%\ia{INDIGO-DataCloud: Overview, Results, Impact}{cloud_d}
%\ia{INDIGO-DataCloud: Software Lifecycle Management embracing DevOps philosophy}{cloud_a}
%\ia{EOSCpilot and interoperability aspects}{cloud_b}
%\ia{Cloud@CNAF - Enabling the INDIGO-DataCloud PaaS Orchestration Layer}{cloud_c}
\ia{Common software lifecycle management in external projects:}{ds_devops_pe}
\ia{EOSC-hub: contributions to project achievements}{ds_eoschub}
\ia{EOSCpilot - Interoperability aspects and results}{ds_eoscpilot}
\ia{Cloud@CNAF Management and Evolution}{ds_cloud_c}
\ia{INFN CorporateCloud: Management and evolution}{ds_infn_cc}
\ia{eXtreme DataCloud project: Advanced data management services for distributed e-infrastructures}{sdds-xdc}
\ia{DEEP-HybridDataCloud project: Hybrid services for distributed e-infrastructures}{sdds-deep}
\ia{DHLab: a digital library for the INFN Cultural Heritage Network}{chnet}
\cleardoublepage
\thispagestyle{empty}
\phantomsection
\addcontentsline{toc}{part}{Technology transfer and other projects}
\addcontentsline{toc}{part}{Technology transfer, outreach and more}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/transfer.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/transfer.pdf}
\cleardoublepage
%\ia{External projects and Technology transfer}{pett}
%\ia{The ISO 27001 Certification}{iso}
%\ia{COmputing on SoC Architectures: the COSA project at CNAF}{cosa}
%\ia{The ExaNeSt project - activities at CNAF}{exanest}
\ia{External Projects and Technology Transfer}{pett}
\ia{INFN CNAF log analysis: a first experience with summer students}{summerstudent}
\ia{The annual international conference of high performance computing: SC18 from INFN point of view}{sc18}
\ia{Infrastructures and Big Data processing as pillars in the XXXIII PhD course in Data Science and Computation}{PhD_DataScience_2018}
\ia{Internal Auditing INFN for GDPR compliance}{audit}
\cleardoublepage
\thispagestyle{empty}
\phantomsection
\addcontentsline{toc}{part}{Additional information}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/additional.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/additional.pdf}
\cleardoublepage
\phantomsection
\addcontentsline{toc}{chapter}{Organization}
\markboth{\MakeUppercase{Organization}}{\MakeUppercase{Organization}}
......@@ -253,14 +299,14 @@ Gaetano Maron
\subsection*{Scientific Advisory Panel}
\begin{tabular}{ l l p{7cm} }
\textit{Chairperson} & Michael Ernst & \textit{\small Brookhaven National Laboratory, USA} \\
& Gian Paolo Carlino & \textit{\small INFN -- Sezione di Napoli, Italy} \\
& Patrick Fuhrmann & \textit{\small Deutsches Elektronen-Synchrotron, Germany} \\
& Josè Hernandez & \textit{\small Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas, Spain} \\
& Donatella Lucchesi & \textit{\small Università di Padova, Italy} \\
& Vincenzo Vagnoni & \textit{\small INFN -- Sezione di Bologna, Italy} \\
& Pierre-Etienne Macchi & \textit{\small IN2P3/CNRS, France}
\textit{Chairperson} & Eleonora Luppi & \textit{\small Università di Ferrara, Italy} \\
& Roberto Saban & \textit{\small INFN, Italy} \\
& Laura Perini & \textit{\small Università di Milano, Italy} \\
& Volker Beckman & \textit{\small IN2P3, France} \\
& Volker Guelzow & \textit{\small Deutsches Elektronen-Synchrotron, Germany} \\
& Alberto Pace & \textit{\small CERN} \\
& Eric Lancon & \textit{\small Brookhaven National Laboratory, USA} \\
& Josè Hernandez & \textit{\small Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas, Spain}
\end{tabular}
% open local environment where the format of section and subsection
......@@ -304,7 +350,7 @@ Gaetano Maron
\multicolumn{2}{l}{\textbf{Software Development}} & \multicolumn{2}{l}{\textbf{Distributed Systems}}
\\[.1cm]\underline{F. Giacomini} & M. Caberletti\footnote{Until August} & \underline{C. Duma} & V.Ciaschini
\\A. Ceccanti & E. Ronchieri & A. Costantini & D. Michelotto
\\E. Vianello & N. Terranova & M. Panella\footnote{Until April} & S. Taneja\footnote{Until October}
\\E. Vianello & N. Terranova & &
\\ F. Proietti & & &
\end{tabular}
\end{savenotes}
......
\documentclass[a4paper]{jpconf}
\usepackage[english]{babel}
% \usepackage{cite}
\usepackage{biblatex}
%\bibliographystyle{abnt-num}
%%%%%%%%%% Start TeXmacs macros
\newcommand{\tmtextit}[1]{{\itshape{#1}}}
\newenvironment{itemizedot}{\begin{itemize} \renewcommand{\labelitemi}{$\bullet$}\renewcommand{\labelitemii}{$\bullet$}\renewcommand{\labelitemiii}{$\bullet$}\renewcommand{\labelitemiv}{$\bullet$}}{\end{itemize}}
%%%%%%%%%% End TeXmacs macros
\begin{document}
\title{Evaluating Migration of INFN--Tier 1 from CREAM-CE/LSF to
HTCondor-CE/HTCondor}
\author{Stefano Dal Pra$^1$}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{stefano.dalpra@cnaf.infn.it}
\begin{abstract}
The Tier 1 data center provides computing resources for a variety of HEP and
Astrophysics experiments, organized in Virtual Organization submitting their
jobs to our computing facilities through Computing Elements, acting as Grid
interfaces to the Local Resource Manager. We planned to phase--out our
current LRMS (IBM/Platform LSF 9.1.3) and CEs (CREAM) to adopt HTCondor as a
replacement of LSF and HTCondor--CE instead of CREAM. A small cluster has
been set up to practice the management and evaluate a migration plan to a
new LRMS and CE set. This document reports about our early experience on
this.
\end{abstract}
\section{Introduction}
The INFN-Tier 1 currently provides a computing power of about 400 kHS06, 35000
slots on one thousand physical Worker Nodes. These resources are accessed
through Grid by 24 Grid VOs and locally by 25 user groups.
The IBM/Platform LSF 9.1.3 batch system arbitrate access to all the competing
users groups, both Grid and local, according to a \tmtextit{fairshare} policy,
designed to prevent underutilization of the available resources or starvation
of lower priority groups, while ensuring a medium--term share proportional to
configured quotas.
The CREAM--CEs act as frontend for Grid users to the underlying LSF batch
system, submitting their jobs on behalf of them. This setup has proven to be
an effective solution for several years. However, the compatibility between
CREAM and HTCondor seems to be less tight than with LSF. Moreover, active
development of CREAM has recently ceased and thus we cannot expect new
versions to be released, nor better HTCondor support to be implemented by an
official development team. We decided to migrate our batch system solution
from LSF to HTCondor, thus we need to also change our CEs. We have selected
HTCondor-CE as a natural choice, because it is maintained by the same
development team of HTCondor. In the following we provide a report about our
experience with HTCondor and HTCondor--CE.
\section{The HTCondor cluster}
To get acquainted with the new batch system and CEs, to evaluate how these can
work together, how other components, such as monitoring, provisioning and
accounting systems can be integrated with HTCondor and HTCondor--CE and
finally to devise a reasonable migration plan, a simple small HTCondor 8.6.13
cluster has been set up during spring 2018. A HTCondor--CE was soon added, in
late April. HTCondor is a very mature opensource product, deployed at several
major Tier 1 for years, thus we already know that it will certainly fit our
use cases. The HTCondor--CE, on the other hand, is a more recent product, and
a number of issues might be too problematic for us to deal with. Our focus is
about ensuring that this CE implementation can be a viable solution for us.
\subsection{The testbed}
The test cluster consists of:
\begin{itemizedot}
\item a HTCondor--CE on top of
\item a HTCondor \ Central Manager and Collector
\item 3 Worker Nodes (Compute Nodes, in HTCondor terms), 16 slot each.
\end{itemizedot}
\subsection{HTCondor--CE Installation and setup}
The first CE installation was a bit tricky. The RPMs were available from OSG
repositories only, meaning that a number of default settings and dependencies
were unmet for EGI standards. Short after, however, HTCondor--CE RPMs were made
available on the same official repository of HTCondor.
\subsubsection{Setup}
To setup the configuration for the HTCondor and HTCondor--CE, puppet modules
are available. Unfortunately the puppet system at our site is not compatible
with these modules as they depend on \tmtextit{hiera}, which is not supported
at our site. These were later adapted to make them compatible with our
configuration management system. In the meanwhile, the setup was finalized
looking at the official documentation.
\subsubsection{Configuration}
The first configuration was completed manually. The main documentation
source for the HTCondor--CE is that of the OSG website~\cite{OSGDOC},
which refers to a tool \tmtextit{osg-configure} not present on the
general HTCondor--CE release. Because of this, the setup was completed
by trial and error. Once a working setup was obtained, a set of
integration notes were added to a public wiki~\cite{INFNWIKI}. This
should help other non OSG users to get some supplementary hint to
complete their installation.
\subsubsection{Accounting}
As of 2018, the official EGI accounting tool, APEL~\cite{APEL}, has no support for
HTCondor--CE. On the other hand, INFN--T1 has a custom accounting tool in
place for several years now~\cite{DGAS}. Thus, it's all about finding a suitable way to
retrieve from HTCondor the same information that we retrieve from CREAM--CE
and LSF.
A working way to do so has been by using python and the \tmtextit{python
bindings}, a set of api interfaces to the HTCondor daemons. These can be used
to query the SCHEDD at the CE and retrieve a specified set of data\quad about
recently finished jobs, which are subsequently inserted to our local
accounting database. A noticeable fact to note, is that the grid information
(User DN, VO, etc.) are directly available together with all the needed
accounting data. This simplifies the accounting problem, as it is no more
necessary to collect grid data separately from the BLAH component and then
look for matches with the corresponding grid counterpart.
This solution have been used during 2018 to provide accounting for
HTCondor--CE testbed cluster.
\subsection{Running HTCondor--CE}
After some time to become confident with the main configuration tasks, the
testbed begun working with jobs submitted by the 4 LHC experiments from
September 2018. The system proved to be stable and smooth, being able to work
unattended. This confirms that this system can be a reliable substitute for
CREAM--CE and LSF.
\subsection{Running HTCondor}
The HTCondor batch system is a mature product with a large user base. We have
put less effort at investigating it deeply. We already know that most or all
of needed features will work well. Rather, some effort have been put on
dealing with configuration management.
\subsubsection{Configuration management}
Eventhoutgh a standard base of puppet classes have been adapted to our
management system, an additional python tool have been written to improve
flexibility and readiness. The tool works by reading and enforcing on each
node of the cluster a set of configuration directives written on text files
accessible from a shared filesystem. The actual set and the read order depends
on the host role and name. Doing so, a large cluster can be quite easily
managed as a collection of set of host sets. The tool is quite simple and
limited but it can be improved as needed when more complex requirements should
arise.
\subsection{The migration plan}
After using the testbed cluster a possible plan for a smooth migration have
been devised:
\begin{itemizedot}
\item Install and setup a new HTCondor cluster, with a few more HTCondor--CE
and an initial small set of Worker Nodes
\item Enable the LHC VOs on the new cluster
\item Add more WN to the new cluster gradually
\item Enable other Grid VOs
\item Finally, enable submission from local submissions. These are made from
a heterogenous set of users, with a potentially rich set of individual needs
and can require a considerable administrative effort to meet all of them.
\end{itemizedot}
\subsection{Conclusion}
A testbed cluster based on HTCondor--CE on top of HTCondor batch system has
been deployed to evaluate these as a substitute for CREAM--CE and LSF. The
evaluation has mostly focused on the HTCondor--CE, as it is the most recent
product. Apart for a few minor issues, mainly related to gaps in the available
documentation, the CE proved to be a stable component. The possibility to
perform accounting has been verified.
\section*{References}
\begin{thebibliography}{9}
\bibitem{OSGDOC} \url{https://opensciencegrid.org/docs/compute-element/install-htcondor-ce/}
\bibitem{INFNWIKI} \url{http://wiki.infn.it/progetti/htcondor-tf/htcondor-ce_setup}
\bibitem{DGAS} S. Dal Pra, ``Accounting Data Recovery. A Case Report from
INFN-T1'' Nota interna, Commissione Calcolo e Reti dell'INFN, {\tt CCR-48/2014/P}
\bibitem{APEL} \url{https://wiki.egi.eu/wiki/APEL}
\end{thebibliography}
\end{document}
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{ Infrastructures and Big Data processing as pillars in the XXXIII PhD course in Data Sciece and Computation}
%\address{Production Editor, \jpcs, \iopp, Dirac House, Temple Back, Bristol BS1~6BE, UK}
\author{D. Salomoni$^1$, A. Costantini$^1$, C. D. Duma$^1$, B. Martelli$^1$, D. Cesini$^1$, E. Fattibene$^1$, D. Michelotto $^1$
% etc.
}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{davide.salomoni@cnaf.infn.it}
\begin{abstract}
During the Academic year 2017-2018 the Alma Mater Studiorum, Universuty of Bologna (IT) activated the XXXIII PhD course in Data Science and Computation.
The course runs for four years and it is devoted to those students graduated in the field of Mathematical Physical, Chemical and Astronomical Sciences.
This course builds upon fundamental data science disciplines to train candidates that should to become able to carry out academic and industrial research
at a higher level of abstraction, with different final specializations in several different fields where data analysis and computation becomes prominent.
In this respect, INFN-CNAF was responsible for two courses: Infrastructure for Big Data processing Basic (IBDB) and Advanced (IBDA)
\end{abstract}
\section{Introduction}
During the Academic year 2017-2018 the Alma Mater Studiorum, Universuty od Bologna (IT) activated the PhD XXXIII course in Data Science and Computation.
The PhD course starts off based on a joint collaboration of the University of Bologna with politecnico di Milano, the Golinelli Foundation, the Italian Institute
of Technology, Cineca, the ISI Foundation and INFN. Even though they are all Italian, each of the aforementioned institutions has already achieved a renown
international role in the upcoming field of scientific management and processing of data. Nonetheless, during its lifetime the Course is intended to discuss,
design and establish a series of international initiatives that include the possibility to reach agreements with foreign Universities and Research Institutions to
issue, for example: joint doctoral degrees, co-­tutorship and student exchanges. These activities will be carried out also based on the contribution that each
member of the Course Board will provide.
The PhD course runs for four years and is aimed at train people to become able to carry out academic and industrial research at a level of abstraction that
builds atop each single scientific skill which lies at the basis of the field of ``Data Science''.
Drawing on this, students graduated in the field of Mathematical Physical, Chemical and Astronomical Sciences should produce original and significant
researches in terms of scientific publications and innovative applications, blending basic disciplines and finally specializing in specific fields as from those
provided in the following ``Curricula and Research'' topics
\begin{itemize}
\item Quantitative Finance and Economics
\item Materials and Industry 4.0
\item Genomics and bioinformatics
\item Personalised medicine
\item Hardware and Infrastructure
\item Machine learning and deep learning
\item Computational physics
\item Big Data, Smart Cities \& Society
\end{itemize}
In this respect, INFN-CNAF was responsible for two courses: Infrastructure for Big Data processing Basic (IBDB) and Advanced (IBDA).
Davide Salomoni has been the responsible in charge for both courses.
\section{Activities to be carried out during the Course}
At the beginning of the course each student is supported by a supervisor, member of the Collegio dei Docenti (Faculty Board), who guides him throughout
the Ph.D. studies. The first 24 months are devoted to the integration and deepening of the student expertise, according to a personalized learning plan
(drawn up by the student in agreement with the supervisor and then submitted to the Board for approval). The learning plan foresees reaching 40 CFU
(credits) by attending courses and passing the corresponding exams. By the 20th month (from the beginning of the course) the student must submit a
written thesis proposal to the Board for approval. By the end of the 24th month the student must have completed the personalized learning plan and
must report on the progress of the thesis draft. The admission (from the first) to the second is taken into consideration by the Board (and approved in
the positive case) on the basis of the fact that the candidate has obtained an adequate number of CFU. The admission (from the second) to the third
is taken into consideration by the Board (and approved in the positive case) if the candidate has obtained all the CFU and on the basis of a candidate's
public presentation regarding his/her thesis proposal. The third and the fourth years are entirely devoted to the thesis work. he admission (from the third)
to the fourth is is taken into consideration by the Board (and approved in the positive case) on the basis of a candidate's public presentation regarding the
current status of his/her thesis. The Board finally approves the admission to the final exam, on the basis of the reviewers' comments. The Board may
authorize a student to spend a period in Italy at universities, research centers or companies. It is mandatory for the student to spend a period of at
least 3 months abroad, during the 3rd/4th year of the course.
\section{Infrastructure for Big Data processing}
As already mentioned, the didactical units Infrastructure for Big Data processing Basic (IBDB) and Advanced (IBDA), headed by Davide Salomoni with the
support of the authors, have been an integral part of the PhD course and constituted the personalized learning plan of some PhD students.
In order to made available the teaching material and to made possible an active interaction among the teachers and the students, a Content
Management System have been deployed and made available. The CMS elected for such activity have been Moodle \cite{moodle} and the entire courses
have been made available trough it via a dedicated link (https://moodle.cloud.cnaf.infn.it/).
\subsection{Infrastructure for Big Data processing Basic}
The course is aimed at providing basic concepts of Cloud computing at the Infrastructure-as-a-Service level. The course started with an introduction to
Big Data It will continue with a description of the building blocks of modern data centers and how they are abstracted by the Cloud paradigm. A real-life
computational challenge was also given and students have to create (during the course) a cloud-based computing model to solve this challenge. Access
to a limited set of Cloud resources and services was granted to students in order to complete the exercises. A very brief introduction to High Performance
Computing (HPC) has been also be given. Notions about the emerging “fog” and “edge” computing paradigms and how they are linked to Cloud infrastructures concluded the course.
The course foresees an oral exam focusing on the presented topics. Students have been requested to prepare a small project discussed during the exam.
The course IBDA covered the wollowing arguments
\begin{itemize}
\item Introduction to IBDB: Here, an introduction to the course and its objective are described to the students. Moreover, a presentation of the computational challenges during the course.
\item Datacenter building blocks: Basic concepts related to batch system, queues, allocation policies, quota, etc. and a description of the different storage system have been provided.
Moreover, an overview of networking, monitoring and provisioning concepts have been given.
\item Infrastructures for Parallel Computing: High Throughput Vs High Performance computing have been described and analysed.
\item Cloud Computing: An introduction to Cloud IaaS have been provided and some comparisons among public and private cloud have been given. Hands-on have been provided
on how to use the IaaS stack layer, deploy virtual resources and create different components.
\item Creating a computing model in distributed infrastructures and multi-sites Cloud: Here an overview of the common strategies for Job Submission, Data Management, Failover
and Disaster Recovery have been described. Moreover, a discussion on computing model creation and introduction to the projects requested for the examination have been started.
\item Computing Continuum: Here an introduction to Low Power devices, Edge Computing, Fog Computing and Computing Continuum for Big Data Infrastructures have been presented.
\end{itemize}
\subsection{Infrastructure for Big Data processing Advanced}
The course is aimed at discussing the foundations of Cloud computing and storage services beyond IaaS (PaaS and SaaS) leading the students to understand how to
exploit distributed infrastructures for Big Data processing.
The IBDA course is intended as an evolution of the IBDB and, therefore, before following this course the IBDB should have already been achieved, or having familiarity with the covered topics.
At the end of the course, the student had practical and theoretical knowledge on distributed computing and storage infrastructures, cloud computing and virtualization,
parallel computing and their application to Big Data Analysis.
The course foresees an oral exam focusing on the presented topics. Students have been requested to prepare a small project discussed during the exam.
The course IBDA covered the wollowing arguments
\begin{itemize}
\item Introduction to IBDA. Here, an introduction to the course and its objective are described to the students. Moreover, a general presentation about Clouds beyond
the IaaS and the INDIGO-DataCloud architecture as a concrete example have been discussed.
\item Authentication and Authorization: Here principles of Cloud authentication and authorization (X.509, SAML, OpenID-Connect, LDAP, Kerberos, Username/password,
OAuth) have been presented, with a focus on the INDIGO-IAM (Identity and Access Management) tool \cite{iam}. The session envisaged also a set of hands-on related to
1)Connecting to INDIGO IAM, 2)Adapting a web-based application to use IAM, 3)Connecting multiple AuthN methods.
\item Cloud PaaS. Here an overview of PaaS and related examples have bee provided, together with a hogh-level description of the TOSCA \cite{tosca} standard for PaaS automation.
Hands-on related to TOSCA template and Alien4Cloud \cite{a4c}.
\item Non-Posix Cloud Storage. Lessons are intended tp provide to the students the basic concepts on POSIX and Object storage with pracital examples and hands-on on CEPH \cite{ceph}
\item Containers. The origin of containers, Docker \cite{docker} and dockerfiles, automation with Docker Swarm and security considerations about containers are provided. Moreover,
a description of how to run docker containers in userspace with udocker \cite{udocker}. Hands-on on how to create a container, working with docker versions and deploy a container
in a Cloud have been carried out to conplete the session.
\item Resource orchestration. Here the local orchestration of resources in Kubernetes \cite{kubernetes}, Mesos \cite{mesos} have been described, with a focus on how Information
Sysytems and the INDIGO Orcehstrator \cite{orchestrator} can be used to orchestrate resources remotely. The hands-on to create and deploy an HTCondor cluster over a Cloud has been also provided to the students.
\item Distributed File Systems. Storj, ipfs and Onedata basic concepts have been described. For the laest topic, an hands-on on how to store and replicate files at multiple sites with Onedata have been provided.
\item Cloud automation. The basic concepts of configuration management automation have been described, focusing the session on Ansible \cite{ansible} configuration manager and its relation with the TOSCA templates.
\end{itemize}
\section{Conclusions}
Based on a joint collaboration of the University of Bologna with Politecnico di Milano, the Golinelli Foundation, the Italian Institute of Technology,
Cineca, the ISI Foundation and INFN, the XXXIII PhD course in Data Sciece and Computation has been activated.
The course is aimed at train people to become able to carry out academic and industrial research at a level of abstraction
that builds atop each single scientific skill which lies at the basis of the field of Data Science.
As part of the PhD course, the teaching units Infrastructure for Big Data processing Basic (IBDB) and Advanced (IBDA) has
been included in the personalized learning plan of some PhD students. The teaching units are aimed at providing foundations of Cloud
computing and storage services beyond IaaS, PaaS, SaaS, leading the students to understand how to exploit distributed infrastructures for Big Data processing.
As an expect result, original, relevant and significant research activities are due by the end of the Course that can take different forms including
for example: scientific publications, system and software design, realization and production, and any kind of innovative applications specializing on a
broad gamut of topics, such as for example: Quantitative Finance and Economics; Materials and Industry 4.0; Genomics and bioinformatics; Personalised
medicine; Hardware and Infrastructure; Machine learning and deep learning; Computational physics; Big Data, Smart Cities \& Society.
\section{References}
\begin{thebibliography}{}
\bibitem{moodle}
Web site: https://moodle.org
\bibitem{iam}
Web site: https://www.indigo-datacloud.eu/identity-and-access-management
\bibitem{tosca}
Web site: https://github.com/indigo-dc/tosca-types
\bibitem{a4c}
Web site: https://github.com/indigo-dc/alien4cloud-deep
\bibitem{ceph}
Web site: https://ceph.com
\bibitem{docker}
Web site: https://www.docker.com/
\bibitem{udocker}
Web site: https://github.com/indigo-dc/udocker
\bibitem{kubernetes}
Web site: https://kubernetes.io/
\bibitem{mesos}
Web site: mesos.apache.org
\bibitem{orchestrator}
Web site: https://www.indigo-datacloud.eu/paas-orchestrator
\bibitem{ansible}
Web site: https://www.ansible.com
\end{thebibliography}
\end{document}
File added
This diff is collapsed.
contributions/alice/network_traffic_cnaf_se_2018.png

146 KiB

contributions/alice/raw_data_accumulation_run2.png

66.2 KiB

contributions/alice/running_jobs_CNAF_2018.png

122 KiB

contributions/alice/running_jobs_per_users_2018.png

182 KiB

contributions/alice/total_traffic_cnaf_tape_2018.png

65.1 KiB

contributions/alice/wall_time_tier1_2018.png

70.5 KiB

......@@ -17,19 +17,12 @@
\begin{abstract}
AMS is a large acceptance instrument conceived to search for anti-particles (positrons, anti-protons, anti-deuterons) coming from dark matter
annihilation, primordial anti-matter (anti-He or light anti nuclei) and to
perform accurate measurements in space of the cosmic radiation in the GeV-TeV
energy range.
Installed on the International Space Station (ISS) in mid-May 2011, it is
operating continuously since then, with a collected statistics of $\sim$ 130
billion events up to the end of 2018.
CNAF is one of the repositories of the full AMS data set and contributes to the
data production and Monte Carlo simulation of the international collaboration.
It represents the central computing resource for the data analysis performed by
Italian collaboration.
In the following, the AMS computing framework, the role of the CNAF computing
center and the use of the CNAF resources in 2018 will be given.
AMS is large acceptance Cosmic Ray (CR) detector operating in space, on board the International Space Station (ISS) since the 19$^{\textrm{th}}$ of May of 2011.\\
%AMS is a large acceptance instrument conceived to search for anti-particles (positrons, anti-protons, anti-deuterons) coming from dark matter annihilation, primordial anti-matter (anti-He or light anti nuclei) and to perform accurate measurements in space of the cosmic radiation in the GeV-TeV energy range.
%Installed on the International Space Station (ISS) in mid-May 2011, it is operating continuously since then, with a collected statistics of $\sim$ 130 billion events up to the end of 2018.
CNAF is one of the repositories of the full AMS data set and contributes to the data production and Monte Carlo simulation of the international collaboration. It represents the central computing resource for the data analysis performed by Italian collaboration and its role will be reviewed in this document.
In the following, the AMS computing framework, the role of the CNAF computing center and the use of the CNAF resources in 2018 will be given.\\
In addition the R\&D activities on going, to integrate cloud resources in such a framework, will discussed.
\end{abstract}
\section{Introduction}
......@@ -93,7 +86,7 @@ The data reconstruction pipeline is mainly composed by two logical step:
\begin{itemize}
\item[1)]{
the {\bf First Production} runs continuously over incoming data doing an
initial validation and indexing. It produces the so called "standard" (STD)
initial validation and indexing. It produces the so called ``standard'' (STD)
reconstructed data stream, ready within two hours after data are received at
CERN, that is used to calibrate different sub-detectors as well as to monitor
off-line the detector performances. In this stage Data Summary Files are
......@@ -107,7 +100,7 @@ The data reconstruction pipeline is mainly composed by two logical step:
to the STD data sample, every 6 months, the time needed to produce and
certify the calibrations. A full reprocessing of all AMS data is carried
out periodically in case of major software major updates, providing the so
called "pass" production. Up to 2018 there were 7 full data reproductions
called ``pass'' production. Up to 2018 there were 7 full data reproductions
done. The last published measurements were based on the pass6 data set, but all the analyses being carried out for the next publications are based on the pass7 ones.
}
\end{itemize}
......@@ -138,7 +131,7 @@ CNAF is the main computing resource for data analysis of the AMS Italian collabo
A full copy of the AMS raw data is preserved on tape, while, usually, the latest production and part of the Monte Carlo sample are available on disk.
More then 30 users are routinely performing the bulk of their analysis at CNAF, transferring to local sites (i.e. their small local computing farm or their laptop) just reduced data sets or histograms.
As described in the following, during 2018, the possibility of a XRootD endpoint at CNAF has been explored. The goal is to federate, through XRootD, the $\sim$ 5 PB available for the AMS Collaboration at CERN, with the $\sim$ 2 PB at CNAF. In this picture, CNAF will be the second data center to share its disk space togheter with the one available for the collaboration, large-scale optimizing it.
As described in the following, during 2018, the possibility of a XRootD endpoint at CNAF has been explored. The goal is to federate, through XRootD, the $\sim$ 5 PB available for the AMS Collaboration at CERN, with the $\sim$ 2 PB at CNAF. In this picture, CNAF will be the second data center to share its disk space togheter with the one available for the collaboration, optimizing it on large-scale.
\section{Data processing strategy at CNAF}
......@@ -216,7 +209,7 @@ Different analysis are carried on by the Italian collaboration. In 2018, the CNA
\subsection*{Research and Development}
\label{ReD}
As mentioned above, during 2017 AMS started evaluating the technical feasibility of integrating also cloud resources (possibly seamlessly) in order to primarily benefit of external computing resources, meant as opportunistic resources. The architectural model foreseen is that all AMS data are and will be hosted at CNAF. Possible cloud compute resources should be able to remotely access data (might be caching locally for the sake of the I/O optimization) and produced data (namely output files) should be moved into the CNAF storage.\\
AMS work-flow has been successfully integrated in DODAS (Dynamic On Demand Analysis Service, a thematic service funded by the EOSC-hub European project) and the work-flow has been validated and consolidated during 2018. The success of the validation tests performed over HelixNebula Science Cloud provided resources and over Google Cloud INFN grant motivate further exploitation as well as evolution of the strategy. In total in 2018 the Italian collaboration benefited of more than 4\textit{\,k\,HS06\,yr} of opportunistic resources, that represent $\sim$ 20\% of the ones obtained from CNAF.\\
AMS work-flow has been successfully integrated in DODAS (Dynamic On Demand Analysis Service, a thematic service funded by the EOSC-hub European project, \cite{DODAS}) and the work-flow has been validated and consolidated during 2018. The success of the validation tests performed over the HelixNebula Science Cloud provided resources and over the Google Cloud INFN grant, motivate further exploitation as well as evolution of the strategy. In total in 2018 the Italian collaboration benefited of more than 4\textit{\,k\,HS06\,yr} of opportunistic resources, that represent $\sim$ 20\% of the ones obtained from CNAF.\\
More in detail during the 2019 the plan is to consolidate the usage of the INFN on-premises cloud providers, namely Cloud@ReCaS Bari and Cloud@CNAF in the context of DODAS. Consolidation by means of improvement in managing I/O by using emerging solution for data caching as well as starting exploiting geographically distributed clusters.\\
The latter is about exploiting DODAS based solutions to create a single logical cluster running over any available resource provider. The desired solution is to allow user submitting jobs from e.g. CNAF provided User Interface to a single queue and allow dynamic clusters to fetch payloads in a secure and transparent (to the end user) way.\\
From a technical perspective the distributed cluster implementation will be based on HTCondor technology which is a important strategic aspect because of we expect this will allow, later on, a completely seamless integration within the batch system of the CNAF Tier 1.
......@@ -269,7 +262,12 @@ The goal by the end of 2019 is to bring the ASI-SSDC hosted computing resources
Phys.\ Rev.\ Lett.\ {\bf 122} (2019) no.10, 101101.
doi:\url{10.1103/PhysRevLett.122.101101}
\bibitem{dst} D. D'Urso \& M. Duranti, Journal of Physics: Conference Series, 664 (2015), 072016
\bibitem{dst} D. D'Urso \& M. Duranti, Journal of Physics: Conference Series, 664 (2015), 072016
\bibitem{DODAS}
D. Spiga {\it et al.}
%“DODAS: How to effectively exploit heterogeneous clouds for scientific computations”,
PoS(ISGC-2018 \& FCDD) {\bf 024} doi:\url{https://doi.org/10.22323/1.327.0024}
%\bibitem{xrootd} http://xrootd.org.
......
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{The ATLAS Experiment at the INFN CNAF Tier 1}
\author{A. De Salvo$^1$, L. Rinaldi$^2$}
\address{$^1$ INFN Sezione di Roma-1, Roma, IT}
\address{$^2$ Universit\`a di Bologna e INFN Sezione di Bologna, Bologna, IT}
\ead{alessandro.desalvo@roma1.infn.it, lorenzo.rinaldi@bo.infn.it}
\begin{abstract}
The ATLAS experiment at LHC was fully operating in 2017. In this contribution we describe the ATLAS computing activities performed in the Italian sites of the Collaboration, and in particular the utilisation of the CNAF Tier 1.
\end{abstract}
\section{Introduction}
ATLAS \cite{ATLAS-det} is one of two general-purpose detectors at the Large Hadron Collider (LHC). It investigates a wide range of physics, from the search for the Higgs boson and standard model studies to extra dimensions and particles that could make up dark matter. Beams of particles from the LHC collide at the center of the ATLAS detector making collision debris in the form of new particles, which fly out from the collision point in all directions. Six different detecting subsystems arranged in layers around the collision point record the paths, momentum, and energy of the particles, allowing them to be individually identified. A huge magnet system bends the paths of charged particles so that their momenta can be measured. The interactions in the ATLAS detectors create an enormous flow of data. To digest the data, ATLAS uses an advanced trigger system to tell the detector which events to record and which to ignore. Complex data-acquisition and computing systems are then used to analyse the collision events recorded. At 46 m long, 25 m high and 25 m wide, the 7000-tons ATLAS detector is the largest volume particle detector ever built. It sits in a cavern 100 m below ground near the main CERN site, close to the village of Meyrin in Switzerland.
More than 3000 scientists from 174 institutes in 38 countries work on the ATLAS experiment.
ATLAS has been taking data from 2010 to 2012, at center of mass energies of 7 and 8 TeV, collecting about 5 and 20 fb$^{-1}$ of integrated luminosity, respectively. During the complete Run-2 phase (2015-2018) ATLAS collected and registered at the Tier 0 147 fb$^{-1}$ of integrated luminosity at center of mass energies of 13 TeV.
The experiment has been designed to look for New Physics over a very large set of final states and signatures, and for precision measurements of known Standard Model (SM) processes. Its most notable result up to now has been the discovery of a new resonance at a mass of about 125 GeV \cite{ATLAS higgs}, followed by the measurement of its properties (mass, production cross sections in various channels and couplings). These measurements have confirmed the compatibility of the new resonance with the Higgs boson, foreseen by the SM but never observed before.
\section{The ATLAS Computing System}
The ATLAS Computing System \cite{ATLAS-cm} is responsible for the provision of the software framework and services, the data management system, user-support services, and the world-wide data access and job-submission system. The development of detector-specific algorithmic code for simulation, calibration, alignment, trigger and reconstruction is under the responsibility of the detector projects, but the Software and Computing Project plans and coordinates these activities across detector boundaries. In particular, a significant effort has been made to ensure that relevant parts of the “offline” framework and event-reconstruction code can be used in the High Level Trigger. Similarly, close cooperation with Physics Coordination and the Combined Performance groups ensures the smooth development of global event-reconstruction code and of software tools for physics analysis.
\subsection{The ATLAS Computing Model}
The ATLAS Computing Model embraces the Grid paradigm and a high degree of decentralisation and sharing of computing resources. The required level of computing resources means that off-site facilities are vital to the operation of ATLAS in a way that was not the case for previous CERN-based experiments. The primary event processing occurs at CERN in a Tier 0 Facility. The RAW data is archived at CERN and copied (along with the primary processed data) to the Tier 1 facilities around the world. These facilities archive the raw data, provide the reprocessing capacity, provide access to the various processed versions, and allow scheduled analysis of the processed data by physics analysis groups. Derived datasets produced by the physics groups are copied to the Tier 2 facilities for further analysis. The Tier 2 facilities also provide the simulation capacity for the experiment, with the simulated data housed at Tier 1 centers. In addition, Tier 2 centers provide analysis facilities, and some provide the capacity to produce calibrations based on processing raw data. A CERN Analysis Facility provides an additional analysis capacity, with an important role in the calibration and algorithmic development work. ATLAS has adopted an object-oriented approach to software, based primarily on the C++ programming language, but with some components implemented using FORTRAN and Java. A component-based model has been adopted, whereby applications are built up from collections of plug-compatible components based on a variety of configuration files. This capability is supported by a common framework that provides common data-processing support. This approach results in great flexibility in meeting both the basic processing needs of the experiment, but also for responding to changing requirements throughout its lifetime. The heavy use of abstract interfaces allows for different implementations to be provided, supporting different persistency technologies, or optimized for the offline or high-level trigger environments.
The Athena framework is an enhanced version of the Gaudi framework that was originally developed by the LHCb experiment, but is now a common ATLAS-LHCb project. Major
design principles are the clear separation of data and algorithms, and between transient (in-memory) and persistent (in-file) data. All levels of processing of ATLAS data, from high-level trigger to event simulation, reconstruction and analysis, take place within the Athena framework; in this way it is easier for code developers and users to test and run algorithmic code, with the assurance that all geometry and conditions data will be the same for all types of applications ( simulation, reconstruction, analysis, visualization).
One of the principal challenges for ATLAS computing is to develop and operate a data storage and management infrastructure able to meet the demands of a yearly data volume of O(10PB) utilized by data processing and analysis activities spread around the world. The ATLAS Computing Model establishes the environment and operational requirements that ATLAS data-handling systems must support and provides the primary guidance for the development of the data management systems.
The ATLAS Databases and Data Management Project (DB Project) leads and coordinates ATLAS activities in these areas, with a scope encompassing technical data bases (detector production, installation and survey data), detector geometry, online/TDAQ databases, conditions databases (online and offline), event data, offline processing configuration and bookkeeping, distributed data management, and distributed database and data management services. The project is responsible for ensuring the coherent development, integration and operational capability of the distributed database and data management software and infrastructure for ATLAS across these areas.
The ATLAS Computing Model defines the distribution of raw and processed data to Tier 1 and Tier 2 centers, so as to be able to exploit fully the computing resources that are made available to the Collaboration. Additional computing resources are available for data processing and analysis at Tier 3 centers and other computing facilities to which ATLAS may have access. A complex set of tools and distributed services, enabling the automatic distribution and processing of the large amounts of data, has been developed and deployed by ATLAS in cooperation with the LHC Computing Grid (LCG) Project and with the middleware providers of the three large Grid infrastructures we use: EGI, OSG and NorduGrid. The tools are designed in a flexible way, in order to have the possibility to extend them to use other types of Grid middleware in the future.
The main computing operations that ATLAS have to run comprise the preparation, distribution and validation of ATLAS software, and the computing and data management operations run centrally on Tier 0, Tier 1 sites and Tier 2 sites. The ATLAS Virtual Organization allows production and analysis users to run jobs and access data at remote sites using the ATLAS-developed Grid tools.
The Computing Model, together with the knowledge of the resources needed to store and process each ATLAS event, gives rise to estimates of required resources that can be used to design and set up the various facilities. It is not assumed that all Tier 1 sites or Tier 2 sites are of the same size; however, in order to ensure a smooth operation of the Computing Model, all Tier 1 centers usually have broadly similar proportions of disk, tape and CPU, and similarly for the Tier 2 sites.
The organization of the ATLAS Software and Computing Project reflects all areas of activity within the project itself. Strong high-level links are established with other parts of the ATLAS organization, such as the TDAQ Project and Physics Coordination, through cross-representation in the respective steering boards. The Computing Management
Board, and in particular the Planning Officer, acts to make sure that software and computing developments take place coherently across sub-systems and that the project as a whole meets its milestones. The International Computing Board assures the information flow between the ATLAS Software and Computing Project and the national resources and their Funding Agencies.
\section{The role of the Italian Computing facilities in the global ATLAS Computing}
Italy provides Tier 1, Tier 2 and Tier 3 facilities to the ATLAS collaboration. The Tier 1, located at CNAF, Bologna, is the main center, also referred as “regional” center. The Tier 2 centers are distributed in different areas of Italy, namely in Frascati, Napoli, Milano and Roma. All 4 Tier 2 sites are considered as Direct Tier 2 (T2D), meaning that they have an higher importance with respect to normal Tier 2s and can have primary data too. They are also considered satellites of the Tier 1, also identified as nucleus. The total of the Tier 2 sites corresponds to more than the total ATLAS size at the Tier 1, for what concerns disk and CPUs; tape is not available in the Tier 2 sites. A third category of sites is the so-called Tier 3 centers. Those are smaller centers, scattered in different places in Italy, that nevertheless contributes in a consistent way to the overall computing power, in terms of disk and CPUs. The overall size of the Tier 3 sites corresponds roughly to the size of a Tier 2 site. The Tier 1 and Tier 2 sites have pledged resources, while the Tier 3 sites do not have any pledge resource available.
In terms of pledged resources, Italy contributes to the ATLAS computing as 9\% of both CPU and disk for the Tier 1. The share of the Tier 2 facilities corresponds to 7\% of disk and 9\% of CPU of the whole ATLAS computing infrastructure. The Italian Tier 1, together with the other Italian centers, provides both resources and expertise to the ATLAS computing community, and manages the so-called Italian Cloud of computing. Since 2015 the Italian Cloud does not only include Italian sites, but also Tier 3 sites of other countries, namely South Africa and Greece.
The computing resources, in terms of disk, tape and CPU, available in the Tier 1 at CNAF have been very important for all kind of activities, including event generation, simulation, reconstruction, reprocessing and analysis, for both MonteCarlo and real data. Its major contribution has been the data reprocessing, since this is a very I/O and memory intense operation, normally executed only in Tier 1 centers. In this sense CNAF has played a fundamental role for the fine measurement of the Higgs [3] properties in 2018 and other analysis. The Italian centers, including CNAF, have been very active not only in the operation side, but contributed a lot in various aspect of the Computing of the ATLAS experiment, in particular for what concerns the network, the storage systems, the storage federations and the monitoring tools. The Tier 1 at CNAF has been very important for the ATLAS community in 2018, for some specific activities:
\begin{itemize}
\item improvements on the WebDAV/HTTPS access for StoRM, in order to be used as main renaming method for the ATLAS files in StoRM and for http federation purposes;
\item improvements of the dynamic model of the multi-core resources operated via the LSF resource management system and simplification of the PanDA queues, using the Harvester service to mediate the control and information flow between PanDA and the resources.
\item network troubleshooting via the Perfsonar-PS network monitoring system, used for the LHCONE overlay network, together with the other Tier 1 and Tier 2 sites;
\item planning, readiness testing and implementation of the HTCondor batch system for the farming resources management.
\end{itemize}
\section{Main achievements of ATLAS Computing centers in Italy}
The Italian Tier 2 Federation runs all the ATLAS computing activities in the Italian cloud supporting the operations at CNAF, the Italian Tier 1 center, and the Milano, Napoli, Roma1 and Frascati Tier 2 sites. This insures an optimized use of the resources and a fair and efficient data access. The computing activities of the ATLAS collaboration have been constantly carried out over the whole 2018, in order to analyse the data of the Run-2 and produce the Monte Carlo data needed for the 2018 run.
The LHC data taking started in April 2018 and, until the end of the operation in December 2018, all the Italian sites, the CNAF Tier 1 and the four Tier 2 sites, have been involved in all the computing operations of the collaboration: data reconstruction, Monte Carlo simulation, user and group analysis and data transfer among all the sites. Besides these activities, the Italian centers have contributed to the upgrade of the Computing Model both from the testing side and the development of specific working groups. ATLAS collected and registered at the Tier 0 ~60.6 fb$^{-1}$ and ~25 PB of raw and derived data, while the cumulative data volume distributed in all the data centers in the grid was of the order of ~80 PB. The data has been replicated with an efficiency of 100\% and an average throughput of the order of ~13 GB/s during the data taking period, with peaks above 25 GB/s. For just Italy, the average throughput was of the order of 800 MB/s with peaks above 2GB/s. The data replication speed from Tier 0 to the Tier 2 sites has been quite fast with a transfer time lower than 4 hours. The average number of simultaneous jobs running on the grid has been of about 110k for production (simulation and reconstruction) and data analysis, with peaks over 150k, with an average CPU efficiency up to more than 80\%. The use of the grid for analysis has been stable on ~26k simultaneous jobs, with peaks around the conferences’ periods to over 40k, showing the reliability and effectiveness of the use of grid tools for data analysis.
The Italian sites contributed to the development of the Xrootd and http/webdav federation. In the latter case the access to the storage resources is managed using the http/webdav protocol, in collaboration with the CERN DPM team, the Belle2 experiment, the Canadian Corporate Cloud ant the RAL (UK) site. The purpose is to build a reliable storage federation, alternative to the Xrootd one, to access physics data both on the grid and on cloud storage infrastructures (like Amazon S3, MicroSoft Azure, etc). The Italian community is particularly involved in this project and the first results have been presented to the WLCG collaboration.
The Italian community also contributes to develop new tools for distributed data analysis and management. Another topic of interest is the usage of new computing technologies: in this field the Italian community contributed to the development and testing of muon tracking algorithms in the ATLAS High Level Trigger, using GPGPU. Other topics in which the Italian community is involved are the Machine Learning/Deep Learning for both analysis and Operational Intelligence and their applications to the experiment software and infrastructure, by using accelerators like GPGPU and FPGAs.
The contribution of the Italian sites to the computing activities in terms of processed jobs and data recorded has been of about 9\%, corresponding to the order of the resource pledged to the collaboration, with very good performance in term of availability, reliability and efficiency. All the sites are always in the top positions in the ranking of the collaboration sites.
Besides the Tier 1 and Tier 2 sites, in 2018 also the Tier 3 sites gave a significant contribution to the Italian physicists community for the data analysis. The Tier 3 centers are local farms dedicated to the interactive data analysis, the last step of the analysis workflow, and to the grid analysis over small data sample. Several italian groups set up a farm for such a purpose in their universities and, after a testing and validation process performed by the distributed computing team of the collaboration, all have been recognized as official Tier 3s of the collaboration.
\section{Impact of CNAF flooding incident on ATLAS computing activities}
The ATLAS Computing Model was designed to have a sufficient redundancy of the available resources in order to tackle emergency situations like the flooding occurred on November 9th 2017 at CNAF. Thanks to the huge effort of the whole community of the CNAF, the operativity of the data center restarted gradually from the second half of February 2018. A continuous interaction between ATLAS distributed computing community and CNAF people was needed to bring the computing operation fully back to normality. The deep collaboration was very successful and after one month the site was almost fully operational and the ATLAS data management and processing activities were running smoothly again. Eventually, the overall impact of the incident was limited enough, mainly thanks to the relatively quick recovery of the CNAF data center and to the robustness of the computing model.
\section*{References}
\begin{thebibliography}{9}
\bibitem{ATLAS-det} The ATLAS Computing Technical Design Report ATLAS-TDR-017;
CERN-LHCC-2005-022, June 2005
\bibitem{ATLAS higgs} Observation of a new particle in the search for the Standard Model Higgs boson with the ATLAS detector at the LHC, the ATLAS Collaboration, Physics Letters B, Volume 716, Issue 1, 17 September 2012, Pages 1–29
\bibitem{ATLAS-cm} The evolution of the ATLAS computing model; R W L Jones and D Barberis 2010 J. Phys.: Conf. Ser. 219 072037 doi:10.1088/1742-6596/219/7/072037
\end{thebibliography}
\end{document}
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\bibliographystyle{iopart-num}
\begin{document}
\title{Internal Auditing INFN for GDPR compliance}
\author{V.~Ciaschini, P.~Belluomo}
\address{INFN CNAF, Viale Berti Pichat 6/2, 40127, Bologna, Italy}
\address{INFN sezione di Catania, Via Santa Sofia 64, 95123, Catania, Italy}
\begin{abstract}
With the General Data Protection Regulation (GDPR) coming into
force, INFN had to decide how to implement its principles and
requirements. To monitor their application and in general INFN's
compliance with GDPR, INFN created a new group, called ``Compliance
Auditing,'' whose job is to be internal auditors for all structures.
This article describes the startup activity for the group.
\end{abstract}
\section{Compliance Auditing Group}
\subsection{GDPR Introduction}
The GDPR, or EU Regulation 2016/679, is an European statute which aims
to regulate collection and use of personal data. This law introduces
several innovation when compared to the previous law dealing with data
protection.
The GDPR predicates its data management phylosophy on a few high level
principles, namely \emph{lawfulness}, \emph{fairness},
\emph{transparency}, \emph{purpose limitation}, \emph{data
minimisation}, \emph{accuracy}, \emph{storage limitation},
\emph{integrity and confidentiality} and finally
\emph{accountability}, which are clearly delineated in the second
chapter, with the rest of the law further characterizing and
contestualizing them.
Before delving any further, it is important to correctly define these principles:
\begin{itemize}
\item Lawfulness means that at any moment in time there must be a
valid legal justification for the treatment of personal data.
This can be an existing law that specifically allows a treatment,
or one from a set of reasons explicitly listed in the GDPR
itself. If none of those applies, lawfulness may be granted by an
explicit permission for the owner of the data, permission that is
only valid for the specific treatment for which it was obtained.
Any further usage of the data requires a new explicit permission.
\item Fairness and transparency mean that any usage of data must be
known the the owner, and such usage must be ``fair.''
\item Purpose limitation means that data collected for a specific
purpose \emph{cannot} be used for any other purpose without an
explicit authorization.
\item Data minimization means that only the data that is relevant for
the purpose for which it is collected must be collected and kept.
\item Accuracy means that the data should be accurate and, if
necessary, kept up-to-date. Data that is inaccurate should be
delated or corrected.
\item Storage limitation means that data should not be kept in a form
that permits personal identification for longer than is required by
the purpose for which it was collected.
\item Integrity and confidentiality means that all collected data must
be kept secret for all the time they are kept, and that they should
be preserved in a form that would preserve them from corruption.
Furthermore, measures must be taken to preempt disclosure, even just
accidental or as a consequence of a crime, to unauthorized persons.
\item Accountability means that the entity or entities that decide
for what purpose data is collected and how it is processed is
responsible for, and must be able to demonstrate compliance with
GDPR.
\end{itemize}
The GDPR does not describe how, exactly, these principles should be
implemented in practice, leaving instead full freedom on deciding how
to satisfy them to the entities that are accountable for respecting
it.
It is therefore clear the disruptive effect of the regulation when
compared to the existing Italian Privacy Law, (NNN) which instead
clearly described a set of rules that \emph{had} to be respected.
This means that the organization needs to implement a set of
regulations and instruments to organize people with responsibilites
and skills to handle, manage and check treatment of personal data.
One organizational measure is actually mandated by GDPR, and it is a
position of Data Protection Officer (DPO), which is an organization's
representative managing dealings with external entities pertaining to
personal data issues. The DPO also has a consultative and reference
role for the organization and all users when dealing with privacy
issues.
GDPR conformancye implementatio rests on five concepts that build and
depend on each other like a wheel, as can be seen from the following
figure:
\includegraphics[width=.9\linewidth]{image.png}
\subsubsection{Organization and Roles}
Starting from the appointing of a DPO, the organizational model must
be formally defined and organized in all its components, defining with
precision roles and responsibilities for members of the organization
in regard to direction and management of all privacy-related issues.
\subsubsection{People, Culture and Skills}
The organization designs and spreads the culture of data protection
and security policies through training and other sensibilization
activities.
\subsubsection{Processes and Rules}
Starting from a culture of security and data protection, Processes and
rules are designed and implemented to ensure privacy by design, data
portability, data breach management, data treatment register and
others whose existence is mandated by GDPR.
\subsubsection{Technologies and Tools}
Technologies and Tools to implement the processes and rules defined in
the previous point, e.g.: antivirus, firewalls, encipherment
algorithms, identity management, etc... are chosen and put in
production.
\subsubsection{Control System}
A monitoring system must be created to invigilate on the compliance of
the organization to laws (e.g.: GDPR) and internal
processes/regulamentation. A key tool of this monitoring is the
realization of audits, internal or external.
\subsection{Rationale for creation}
In the context of the required vigilance was a fundamental process for
GDPR compliance, a group of people was formed in INFN, called
``Compliance Auditing INFN'' whose duty is the verification of
compliance with both external (e.g. GDPR) and internal (e.g. Norme per
l'uso delle risorse informatiche) norms of the actual behaviour of the
different INFN structures.
Proposed in the first half of 2018, the group is staffed by Patrizia
Belluomo (Lead Auditor, INFN Catania) and Vincenzo Ciaschini (Auditor,
INFN CNAF) who have experience in auditing due to having ISO 27001
certifications.
\subsection{Startup Activity}
\subsubsection{Operative Plan}
The first activity undertaken by the group was a collection, followed
by the study of all the norms applicable to INFN's implementation of
GDPR, like the text of the normative itself, other applicable Italian
legislation, the documents describing INFN's implementation, and
several INFN regulations that, while not specifically talking about
GDPR, still governed issues that were related to it, e.g data
retention policies.
Preparation for the audits entailed several months of study of the
norms applicable to INFN's implementation, like the GDPR itself, other
applicable Italian legislation, the documents describing INFN's
implementation, and several INFN regulations that, while not
specifically talking about GDPR, still governed issues that were
related to it, e.g data retention policies. From this, we
extrapolated a set of indication ``possible requests index'' that we
shared with all of the INFN structures that would receive an audit.
Another fundamental step that in this case preceded the foundation of
the group was the compilation by each INFN structure of a file
(Minimal Measure Implementation) describing the implementation of a
minimal set of security controls which were to be collected on an
official CCR repository along with the corollary information that each
section deemed necessary.
We identified four main points that we had to evaluate:
\begin{description}
\item[Data Discovery and Risk Assessment] Identify the personal data
kept by the structure and where it was stored or used.
\item[Protection] How the data is protected.
\item[Vigilance] How possible threats are discovered and how to
evaluate the extent of a security violation.
\item[Response] Incident Response Preparation and actions to
mitigate impact and reduce future risk.
\end{description}
\subsubsection{Official Documents Preparation}
We decided to implement the audit according to the well known
priciples in ISO 19011 standard. To adapt that to INFN's
specificities, we created a set of documents and procedures that would
ensure uniformity of judgement and that would make results directly
comparable among successive audits.
We also setup a document repository, which would contain both the
official documentation and the results of all audits of all structures
that would be performed. It is inside INFN's official document
repository, Alfresco.
\subsubsection{Audit Planning}
According to the formal procedure, Audit Plans for INFN structures
were formalized and scheduled, and we started contacting the various
parts of INFN to share it. The plan was to complete all audits in the
first half of 2019, starting from January. Budgetary reasons forbade
phisically traveling to al the cities that housed INFN, so most of
the audits were planned to be done in telepresence, with only the 5
that housed the most delicate data.
\subsubsection{Structure Feedback}
Finally, we defined a procedure to control the actions to undertake
during the audit and how to receive feedback from INFN structures.
\section{Conclusion}
With all these work done, we were ready to start, and began
in earnest January 9, 2019 with our first Audit, but that would be out
of scope for 2018's Annual Report.
\end{document}
contributions/audit/image.png

50.5 KiB

\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{The Borexino experiment at the INFN- CNAF}
\author{Alessandra Carlotta Re$^1$\\ \small{on behalf of the BOREXINO collaboration}}
\address{$^1$ Universit\`{a} degli Studi di Milano e INFN Sezione di Milano, Milano, IT}
\ead{alessandra.re@mi.infn.it}
\begin{abstract} %OK
Almost all the energy from the Sun is produced through sequences of nuclear reactions that convert hydrogen into helium. Five of these
processes emit neutrinos and represent a unique probe of the Sun's internal working.
Borexino is a large volume liquid scintillator experiment designed for low energy neutrino detection, installed at the National
Laboratory of Gran Sasso (Assergi, Italy) and operating since May 2007.
Given the tiny cross-section of neutrino interactions with electrons ($\sigma ~ \approx 10^{-44}\,-\,10^{-45}~\mathrm{cm}^2$, for the
solar neutrino energy range), the Borexino expected rates are very small. Despite that, the exceptional levels of radiopurity made possible
for Borexino to accomplish not only its primary goal but also to produce many other interesting results both within and beyond the Standard
Model of particle physics, helping a better understanding of the neutrino's features
\end{abstract}
\section{The Borexino experiment} %OK
The Borexino experiment is located deep underground (3,800 meter water equivalent) in the Hall C of the National Laboratory of Gran Sasso (Assergi, Italy), and measures
solar neutrinos via their interactions with a target of 278 ton organic liquid scintillator. The ultrapure liquid scintillator is contained inside a very thin transparent nylon vessel of 8.5 m diameter.
Solar neutrinos are detected by measuring the energy and position of electrons scattered by the neutrino-electron elastic interactions.
The scintillator promptly converts the kinetic energy of the electrons by emitting photons, which are then detected and converted into electronic signals by 2212 photomultipliers (PMT) mounted on a concentric 13.7 m diameter stainless steel sphere.
The Borexino detector was designed exploiting the principle of graded shielding: an onion-like structure allows to shield from external radiations and from radiations produced in the external layers. The requirements on material radiopurity increase when moving to the innermost region of the detector.
Starting from 2007 and through years, the Borexino \cite{ref:BxLong} experiment has been measuring the fluxes of low-energy neutrinos (neutrinos with energy $<3$ MeV), most notably those emitted in nuclear fusion reactions and $\beta$ decays along the pp-chain in the Sun.
\section{The Borexino recent results} %OK
On January 2018, the Borexino collaboration released a detailed paper \cite{ref:BxMC} about the Monte Carlo (MC) simulation of its detector and the agreement of the MC output
with the detector's acquired data. The simulation accounts for absorption, reemission, and scattering of the optical photons, tracking them until they either are absorbed or reach the photocathode of one of the photomultiplier tubes. These simulations were used and still are used, to study and reproduce the energy response of the detector, its uniformity within the fiducial scintillator volume relevant to neutrino physics, and the time distribution of detected photons to better than 1\% between 100 keV and several MeV. This work has been foundamental to all the Borexino analysis so far done.
On October 2018, the collaboration published on Nature \cite{ref:BxNuSol} the latest solar neutrino result concerning a comprehensive measurement of all fluxes of the pp-chain solar neutrinos. This work is a milestone for solar neutrino physics since it provides the first, complete study of the solar pp-chain and of its different terminations in a single detector and with a uniform data analysis procedure. This study confirmed the nuclear origin of the solar power and provided the most complete real-time insight into the core of our Sun so far.
The Borexino analysis is now focused on the possibility to measure the interaction rates of the rare CNO solar neutrinos.
\section{The Borexino computing at CNAF} %OK
The INFN-CNAF currently hosts the whole Borexino data statistics and the users' area for physics studies.
The Borexino data are classified into three types:
\begin{itemize}
\item {\bf raw data~} Raw data are compressed binary files with a typical size of about 600 Mb corresponding to a data taking time of $\sim$6h.
\item {\bf ROOT files~~~~} ROOT files are files containing the Borexino reconstructed events, each organized in a {\tt ROOT TTree}: their typical dimension is $\sim$1Gb.
\item {\bf DSTs~~~~~~~} DST files contain only a selection of events for the high level analyses.
\end{itemize}
Borexino standard data taking requires a disk space increase of about 10 Tb/year while a complete Monte Carlo simulation of both neutrino signals and backgrounds requires about 8 Tb/DAQ year.
The CNAF TAPE area also hosts a full backup of the Borexino rawdata.
Our dedicated front-end machine ({\tt ui-borexino.cr.cnaf.infn.it}) and pledged CPU resources (about 1500 HS06) are used by the Borexino collaboration for ROOT files production, Monte Carlo simulations, interactive and batch analysis jobs.
Moreover, few times a year, an extraordinary peak usage (up to 3000 HS06 at least) is needed in order to perform a full reprocessing of the whole data statistics with updated versions of the reconstruction code and/or a massive Monte Carlo generation.
\section{Conclusions} %OK
Borexino has been, so far, the only experiment able to perform a real-time spectroscopy of neutrinos from almost all the nuclear reactions happening in the Sun. Near future goals are mainly focused around improving its current limit of the CNO neutrino flux and possibly measure it. While the amount of CPUs resources needed is expected to remain quite stable, during next years the Borexino collaboration will increase its disk space requests so to successfully complete its challenging and very rich physics program.
\section*{References}
\begin{thebibliography}{9}
\bibitem{ref:BxLong} Bellini~G. {\it et al.} 2014~{\it Phys. Rev. D} {\bf 89} 112007.
\bibitem{ref:BxMC} Agostini~M. {\it et al.} 2018~{\it Astropart. Phys.} {\bf 97} 136.
\bibitem{ref:BxNuSol} Agostini~M. {\it et al.} 2018~{\it Nature} {\bf 562} 505.
\end{thebibliography}
\end{document}
contributions/chnet/ArchDiagram.png

36.9 KiB

\documentclass[a4paper]{jpconf}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{url}
\begin{document}
\title{DHLab: a digital library for the INFN Cultural Heritage Network}
\author{F. Proietti$^1$, L. dell'Agnello$^1$, F. Giacomini$^1$}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{fabio.proietti@cnaf.infn.it}
\begin{abstract}
DHLab, as part of the Cultural Heritage Network (CHNet) promoted by
INFN, is a cloud-based environment to process, visualise and analyse
data acquired from members of the network and that will be provided
to technical and non-technical users. DHLab is under development and
currently its main features are a cloud service to upload and manage
the data, a form to assign metadata to uploaded datasets and a
service used to analyze data obtained from XRF measurements.
\end{abstract}
\section{Introduction}
CHNet\footnote{http://chnet.infn.it/} is a network composed by several
INFN teams who devote their expertise in physics research to the study
and diagnostics of Cultural Heritage. By using their existing instruments,
developed for Nuclear Physics, or even by building new ones,
INFN laboratories started to address the needs of archaeologists,
historians, art historians, restorers and conservators. This unified
knowledge can provide useful indications about the correct procedures
to be applied for restoration or conservation, could be important to
date or verify, for example, the authenticity of an artwork or study
the provenance of raw material in order to retrace ancient trade
routes. In this context the purpose of the DHLab is to host all the
data acquired by the CHNet laboratories, together with the
descriptions and annotations added by humanists.
\section{Architecture}
The infrastructure system, shown in figure~\ref{fig:architecture},
follows a cloud-based model and can be divided in multiple modular
frontends, providing the interface towards the clients, and a
monolithic backend service.
\begin{figure}[ht]
\begin{center}
\includegraphics[scale=.4]{ArchDiagram.png}
\caption{\label{fig:architecture}High level overview of DHLab
architecture}
\end{center}
\end{figure}
The frontend includes three main blocks: a cloud service, a metadata
form and an application service. Of these, the metadata form, used to
fill details about a work or an analysis (see
section~\ref{sec:metadata-form}), is usable also while being offline;
the requirement addresses the use case of an operator who, while
disconnected from the network, needs to fill the metadata saving them
as a file on the local machine. The same requirement may be at least
partly satisfied also for the application services.
On the backend side, which is only partially implemented and not yet
even fully designed, we currently expect to have a listener, to
dispatch client requests, two data stores, one for user profiles and
the other for actual datasets, and a set of auxiliary services, for
example to automate the filling of the metadata form and to
standardize some of its fields (see again
section~\ref{sec:metadata-form}).
The entire system is hosted at the CNAF data center.
\section{Technologies and protocols}
As stated above, the design of the system is not yet complete and we
are still investigating different options to address the challenges we
face.
Open aspects concern:
\begin{itemize}
\item the data model, which must accomodate both datasets (possibly
composed of multiple files), the corresponding metadata and a
mechanism to link them together;
\item the authentication and authorization model, which should use as
much as possible standard web technologies and have flexible
mechanisms to authenticate users coming from different institutions,
leveraging their own Identity Providers;
\item how to access the available storage from a client, both to
upload datasets and their metadata and subsequently access them.
\end{itemize}
The experimentation makes use of an installation of
NextCloud~\cite{ref:nextcloud}, an open-source suite of client-server
software for creating and using file hosting services, with
functionality often extended through the use of plugins.
Authentication is based on OpenID Connect~\cite{ref:oidc} and makes
use of the INDIGO-IAM~\cite{ref:iam} service, an Identity and Access
Management product developed within the EU-funded
INDIGO-DataCloud~\cite{ref:indigo} project. INDIGO-IAM offers a
service to manage identities, user enrollment, group membership,
attributes and policies to access distributed resources and services
in a homogeneous and interoperable way; hence it represents a perfect
match to manage users, groups and resources of the CHNet
organization. In particular INDIGO-IAM delegates the authentication of
a user to their home institution identity provider under a trust
agreement.
NextCloud offers also the possibility to access data via the WebDAV
protocol, allowing users to mount the remote storage on their local
machine and see it as if it were a local disk. This feature becomes
useful when interaction through a web browser is not the most
effective tool, for example for batch or bulk operations.
\section{Metadata Form}
\label{sec:metadata-form}
The Metadata form is a web application whose purpose is to associate
metadata with art works, measurement campaigns and analysis
results. The application, written in Typescript~\cite{ref:typescript}
and based on the Angular 2 framework~\cite{ref:angular2}, is under
development; the main deployment option foresees its integration into
the cloud platform, but the combination with
Electron~\cite{ref:electron} makes a desktop application a viable
alternative.
As shown in figure~\ref{fig:metadataSchema}, to fill the metadata form
a user can follow two paths: they can create a \textit{campaign} and
associate it with multiple \textit{sessions} and \textit{analyses} or
they can store information about a single \textit{analysis}. In
particular, each \textit{analysis} can be associated with one or more
\textit{datasets}, the studied \textit{object} (i.e.,~an art work) and
all the information about its \textit{type}, \textit{author},
\textit{holder}, \textit{owner}, etc. In addition, users can provide
information about the analysis type, the operator who performed the
analysis, the devices, components and software used to scan, create or
read the resulting dataset. When completed, the resulting form,
translated into a JSON file, can be saved locally or uploaded to the
remote storage.
\begin{figure}[ht]
\begin{center}
\includegraphics[scale=.4]{metadataSchema.png}
\end{center}
\caption{\label{fig:metadataSchema}Schema of the sections included
in the metadata description.}
\end{figure}
\section{Application services}
DHLab is also designed to provide visualization and analysis services
for some of the stored datasets. Currently a proof-of-concept
application is available, to visualize and perform some analysis of
images obtained from XRF scans~\cite{ref:xrf}.
\section{Conclusions}
DHLab is a project born from the need to group, share, catalogue and
reuse data that comes from measurements and analyses of cultural
heritage works. It aims at being flexible and usable by persons
covering different roles: physicists, computer scientists, cultural
heritage operators. The system is designed and deployed around a core
Cloud-based infrastructure, but some of its parts must be functioning
in offline situations.
A web application for filling a form with metadata to be associated to
collected datasets according to an agreed-upon schema is being
developed.
Other web applications are foreseen for the visualization and analysis
of the stored datasets, starting from those coming from XRF,
radiocarbon and thermoluminescence analysis.
\section*{References}
\begin{thebibliography}{9}
\bibitem{ref:nextcloud} NextCloud \url{https://nextcloud.com/}
\bibitem{ref:oidc} OpenId Connect \url{https://openid.net/connect}
\bibitem{ref:iam} A Ceccanti, E Vianello, M Caberletti. (2018,
May). INDIGO Identity and Access Management
(IAM). Zenodo. \url{http://doi.org/10.5281/zenodo.1874790}
\bibitem{ref:indigo} The INDIGO-DataCloud project
\url{https://www.indigo-datacloud.eu/}
\bibitem{ref:typescript} Typescript language
\url{https://www.typescriptlang.org/}
\bibitem{ref:angular2} Angular 2 framework
\url{https://angular.io/}
\bibitem{ref:electron} Electron
\url{https://electronjs.org/}
\bibitem{ref:xrf} Cappelli L, Giacomini F, Taccetti F, Castelli L,
dell'Agnello L. 2016. A web application to analyse XRF scanning data. INFN-CNAF
Annual Report. \url{https://www.cnaf.infn.it/annual-report}
\end{thebibliography}
\end{document}