Chargement des packages
# Charger les packages
import polars as pl
import pypmsi as pm
import datetime
Lecture des données
now = datetime.datetime.now()
rsa = pm.irsa(290000017 ,
2020 ,
12 ,
'~/Documents/data/mco' ,
# n_rows = 1,
typi = 4 )
(datetime.datetime.now() - now).total_seconds()
La lecture de 130 000 RSA prend environ 4 secondes en découpant les parties variables.
La lecture de la partie fixe uniquement se fait en environ 1 seconde.
La lecture complète avec les parties dites “stream” prend environ 15 secondes.
shape: (7, 85)
describe
nofiness
novrsa
cle_rsa
novrss
noseqta
gpvclass
gpcmd
gptype
gpnum
gpcompx
gpcdretr
rsavclass
rsacmd
rsatype
rsanum
rsacompx
rsacdretr
nbrum
agean
agejr
sexe
echpmsi
prov
moissor
ansor
schpmsi
dest
typesej
duree
cdgeo
cdpostal
poids
agegest
delaireg
nbseance
noghs
...
nbsupchs
nbsupehs
nbacte9615
nbsupreaped
nbsupatpart
nb_rdth
valvaort
genautorsa
ghshorsinno
nbsupcaisson
typrestpo
nbsuprea
nbsupsi
nbsupstf
nbsupsrc
nbsupnn1
nbsupnn2
nbsupnn3
nbsuprep
paslitsp
typmachradio
typedosim
numinno
suppdefcard
conversion_hc
pc_raac
admission_maison_naissance
ell_gradation
surveillance_particuliere
resererve_hosp
rescrit_tarifaire
cat_nb_intervenants
noseqrum
dp
dr
ndas
na
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
f64
f64
f64
str
str
str
str
str
str
str
str
f64
str
str
f64
f64
f64
f64
str
...
f64
f64
f64
f64
f64
f64
str
str
str
f64
str
f64
f64
f64
f64
f64
f64
f64
f64
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
str
f64
f64
"count"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
128920.0
128920.0
128920.0
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
128920.0
"128920"
"128920"
128920.0
128920.0
128920.0
128920.0
"128920"
...
128920.0
128920.0
128920.0
128920.0
128920.0
128920.0
"128920"
"128920"
"128920"
128920.0
"128920"
128920.0
128920.0
128920.0
128920.0
128920.0
128920.0
128920.0
128920.0
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
"128920"
128920.0
128920.0
"null_count"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
0.0
4111.0
124809.0
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
0.0
"0"
"0"
126090.0
123936.0
125049.0
0.0
"0"
...
0.0
0.0
0.0
0.0
0.0
0.0
"0"
"0"
"0"
0.0
"0"
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
0.0
0.0
"mean"
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
1.112232
59.391462
51.064704
null
null
null
null
null
null
null
null
2.379701
null
null
3193.380212
38.630819
221.59907
0.45059
null
...
0.0
0.0
0.002622
0.000303
0.010448
0.005701
null
null
null
0.004491
null
0.071665
0.022177
0.150334
0.044012
0.034153
0.017755
0.017833
0.002599
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
2.881818
3.204654
"std"
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
0.454878
21.153909
92.017988
null
null
null
null
null
null
null
null
6.048472
null
null
718.94463
2.868469
83.954285
0.497601
null
...
0.0
0.0
0.1347
0.01739
0.524554
0.077324
null
null
null
0.268389
null
1.165289
0.355574
1.39525
0.732701
0.835859
0.757202
0.730919
0.167758
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
6.300596
8.354556
"min"
"290000017"
"226"
"0000000001"
"119"
"004"
"11"
"01"
"C"
"00"
"1"
"000"
"11"
"01"
"C"
"02"
"1"
"000"
1.0
1.0
0.0
"1"
"0"
""
"01"
"2020"
"0"
""
""
0.0
"01090"
"01090"
104.0
22.0
10.0
0.0
"0022"
...
0.0
0.0
0.0
0.0
0.0
0.0
""
"0"
"0022"
0.0
"0"
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
"0"
""
""
""
"0"
""
"0"
"0"
""
""
""
""
""
"01"
"A010"
""
0.0
0.0
"max"
"290000017"
"226"
"0000128920"
"120"
"006"
"11"
"90"
"Z"
"62"
"Z"
"241"
"11"
"28"
"Z"
"62"
"Z"
"241"
10.0
105.0
363.0
"2"
"8"
"R"
"12"
"2020"
"9"
"7"
"B"
246.0
"9H014"
"99404"
6640.0
42.0
300.0
3.0
"9999"
...
0.0
0.0
33.0
1.0
55.0
3.0
"1"
"0"
"9999"
33.0
"3"
90.0
55.0
119.0
52.0
84.0
117.0
91.0
33.0
"1"
"4"
"4"
""
"1"
"2"
"1"
"0"
"1"
"2"
"2"
""
"C"
"08"
"Z988"
"Z992+0"
214.0
534.0
"median"
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
1.0
64.0
0.0
null
null
null
null
null
null
null
null
0.0
null
null
3300.0
39.0
268.0
0.0
null
...
0.0
0.0
0.0
0.0
0.0
0.0
null
null
null
0.0
null
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
0.0
1.0
shape: (7, 14)
describe
cle_rsa
delai
cdccam
descri
phase
act
extdoc
modif
rmbtexcep
assonp
nbexec
indval
nseqrum
str
str
f64
str
str
str
str
str
str
str
str
f64
str
str
"count"
"413144"
413144.0
"413144"
"413144"
"413144"
"413144"
"413144"
"413144"
"413144"
"413144"
413144.0
"413144"
"413144"
"null_count"
"0"
1091.0
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
0.0
"0"
"0"
"mean"
null
4.180067
null
null
null
null
null
null
null
null
1.0
null
null
"std"
null
9.975205
null
null
null
null
null
null
null
null
0.0
null
null
"min"
"0000000001"
0.0
"AAFA001"
""
"0"
"1"
""
""
""
""
1.0
"1"
"01"
"max"
"0000128920"
368.0
"ZZQX217"
"48"
"3"
"5"
"7"
"ZF"
"O"
"5"
1.0
"1"
"10"
"median"
null
1.0
null
null
null
null
null
null
null
null
1.0
null
null
shape: (7, 5)
describe
cle_rsa
nseqrum
diag
position
str
str
str
str
f64
"count"
"404224"
"404224"
"404224"
404224.0
"null_count"
"0"
"0"
"0"
0.0
"mean"
null
null
null
2.363118
"std"
null
null
null
1.093825
"min"
"0000000001"
"01"
"A010"
1.0
"max"
"0000128920"
"10"
"Z998"
4.0
"median"
null
null
null
3.0
shape: (7, 18)
describe
cle_rsa
nseqrum
nsequm
nohop1
dpum
drum
igs2
agegestrum
nbdiagas
nbacte
dureesejpart
typaut1
natsupp1
nbsupp1
typaut2
natsupp2
nbsupp2
str
str
str
str
str
str
str
str
str
f64
f64
f64
str
str
f64
str
str
str
"count"
"143389"
"143389"
"143389"
"143389"
"143389"
"143389"
"143389"
"143389"
143389.0
143389.0
143389.0
"143389"
"143389"
143389.0
"143389"
"143389"
"143389"
"null_count"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
"0"
0.0
0.0
0.0
"0"
"0"
0.0
"0"
"0"
"0"
"mean"
null
null
null
null
null
null
null
null
2.591022
2.881281
2.139571
null
null
0.327856
null
null
null
"std"
null
null
null
null
null
null
null
null
4.121288
6.695112
5.026426
null
null
2.228983
null
null
null
"min"
"0000000001"
"01"
"0001"
"290000058"
"A010"
""
"000"
""
0.0
0.0
0.0
"01AC"
"00"
0.0
""
""
""
"max"
"0000128920"
"10"
"0103"
"290004365"
"Z998"
"Z992+0"
"880"
"42"
46.0
530.0
246.0
"73 M"
"13"
117.0
"01AC"
"02"
"0004"
"median"
null
null
null
null
null
null
null
null
1.0
1.0
0.0
null
null
0.0
null
null
null
Case-mix GHM / GHS et DMS
on écrit to_pandas() à la fin pour un meilleur affichage dans le notebook Quarto .
(
rsa['rsa' ]
# à la volée on ajoute des colonnes utiles au calcul (ghm, duree hors 0 nuit, nb de 0 nuit)
.with_columns(
pl.concat_str(['rsacmd' , 'rsatype' , 'rsanum' , 'rsacompx' ]).alias('ghm' ),
pl.when(pl.col('duree' ) > 0 ).then(pl.col('duree' )).otherwise(None ).alias('duree1' ),
pl.when(pl.col('duree' ) == 0 ).then(True ).otherwise(False ).alias('flag0' )
)
.groupby(['ghm' , 'noghs' ])
.agg(pl.col('duree' ).mean().alias('DMS avec 0 nuitée' ).round (1 ),
pl.col('duree1' ).mean().alias('DMS 1 nuit et +' ).round (1 ),
pl.col('duree' ).count().alias('Nb séjours' ),
pl.col('flag0' ).sum ().alias('Nb séjours 0 nuitée' ))
.sort(pl.col('ghm' ))
.head(5 )
.to_pandas()
)
ghm
noghs
DMS avec 0 nuitée
DMS 1 nuit et +
Nb séjours
Nb séjours 0 nuitée
0
01C031
0022
3.9
4.2
14
1
1
01C032
0023
7.5
7.5
33
0
2
01C033
0024
14.3
14.3
28
0
3
01C034
0025
31.4
31.4
16
0
4
01C041
0026
4.6
4.7
107
2
Requête actes / diags
On cherche à dénombrer rapidement les prises en charge avec un acte CCAM commençant par HFCC et les ventiler par diagnostic principal du séjour.
on écrit to_pandas() à la fin pour un meilleur affichage dans le notebook Quarto .
(
rsa['actes' ]
.filter (pl.col('cdccam' ).str .contains('HFCC' ))
.filter (pl.col('act' ) == '1' )
.join(rsa['rsa' ].select(['cle_rsa' , 'duree' , 'dp' ]), how = 'inner' , on = 'cle_rsa' )
.groupby('dp' , 'cdccam' )
.agg(pl.col('duree' ).count().alias('nb_rsa' ),
pl.col('duree' ).mean().round (1 ).alias('DMS' ))
.sort(pl.col('nb_rsa' ), descending = True )
.head(5 )
.to_pandas()
)
dp
cdccam
nb_rsa
DMS
0
E6696
HFCC003
16
4.9
1
E6695
HFCC003
5
3.0
2
K265
HFCC001
4
7.8
3
E6606
HFCC003
1
4.0
4
K918
HFCC001
1
8.0
Compter les erreurs de la fonction groupage
# Sélectionner des colonnes avec une regexp :
# on trouve rsacdretr qui est la colonne que l'on cherche
(
rsa['rsa' ]
.select(pl.col('^rsa.*$' ), 'duree' )
.head(5 )
.to_pandas()
)
rsavclass
rsacmd
rsatype
rsanum
rsacompx
rsacdretr
duree
0
11
04
M
13
4
000
202
1
11
15
M
05
D
154
206
2
11
11
K
02
4
000
128
3
11
05
C
12
4
000
119
4
11
23
M
06
2
000
246
(
rsa['rsa' ]
.select(pl.col('^rsa.*$' ), 'duree' )
.groupby('rsacdretr' )
.agg(pl.col('duree' ).count().alias('nb_rsa' ))
.filter (~ pl.col('rsacdretr' ).str .contains('000' ))
.sort('nb_rsa' , descending = True )
.head(5 )
.to_pandas()
)
rsacdretr
nb_rsa
0
170
1140
1
154
756
2
223
220
3
152
176
4
112
120