Test pypmsi with Quarto

Chargement des packages

# Charger les packages
import polars as pl
import pypmsi as pm
import datetime

Lecture des données

now = datetime.datetime.now()

rsa = pm.irsa(290000017, 
              2020, 
              12, 
              '~/Documents/data/mco',  
              # n_rows = 1, 
              typi = 4)

(datetime.datetime.now() - now).total_seconds()
3.935305

La lecture de 130 000 RSA prend environ 4 secondes en découpant les parties variables.

La lecture de la partie fixe uniquement se fait en environ 1 seconde.

La lecture complète avec les parties dites “stream” prend environ 15 secondes.

rsa['rsa'].describe()
shape: (7, 85)
describe nofiness novrsa cle_rsa novrss noseqta gpvclass gpcmd gptype gpnum gpcompx gpcdretr rsavclass rsacmd rsatype rsanum rsacompx rsacdretr nbrum agean agejr sexe echpmsi prov moissor ansor schpmsi dest typesej duree cdgeo cdpostal poids agegest delaireg nbseance noghs ... nbsupchs nbsupehs nbacte9615 nbsupreaped nbsupatpart nb_rdth valvaort genautorsa ghshorsinno nbsupcaisson typrestpo nbsuprea nbsupsi nbsupstf nbsupsrc nbsupnn1 nbsupnn2 nbsupnn3 nbsuprep paslitsp typmachradio typedosim numinno suppdefcard conversion_hc pc_raac admission_maison_naissance ell_gradation surveillance_particuliere resererve_hosp rescrit_tarifaire cat_nb_intervenants noseqrum dp dr ndas na
str str str str str str str str str str str str str str str str str str f64 f64 f64 str str str str str str str str f64 str str f64 f64 f64 f64 str ... f64 f64 f64 f64 f64 f64 str str str f64 str f64 f64 f64 f64 f64 f64 f64 f64 str str str str str str str str str str str str str str str str f64 f64
"count" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" 128920.0 128920.0 128920.0 "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" 128920.0 "128920" "128920" 128920.0 128920.0 128920.0 128920.0 "128920" ... 128920.0 128920.0 128920.0 128920.0 128920.0 128920.0 "128920" "128920" "128920" 128920.0 "128920" 128920.0 128920.0 128920.0 128920.0 128920.0 128920.0 128920.0 128920.0 "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" "128920" 128920.0 128920.0
"null_count" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 0.0 4111.0 124809.0 "0" "0" "0" "0" "0" "0" "0" "0" 0.0 "0" "0" 126090.0 123936.0 125049.0 0.0 "0" ... 0.0 0.0 0.0 0.0 0.0 0.0 "0" "0" "0" 0.0 "0" 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 0.0 0.0
"mean" null null null null null null null null null null null null null null null null null 1.112232 59.391462 51.064704 null null null null null null null null 2.379701 null null 3193.380212 38.630819 221.59907 0.45059 null ... 0.0 0.0 0.002622 0.000303 0.010448 0.005701 null null null 0.004491 null 0.071665 0.022177 0.150334 0.044012 0.034153 0.017755 0.017833 0.002599 null null null null null null null null null null null null null null null null 2.881818 3.204654
"std" null null null null null null null null null null null null null null null null null 0.454878 21.153909 92.017988 null null null null null null null null 6.048472 null null 718.94463 2.868469 83.954285 0.497601 null ... 0.0 0.0 0.1347 0.01739 0.524554 0.077324 null null null 0.268389 null 1.165289 0.355574 1.39525 0.732701 0.835859 0.757202 0.730919 0.167758 null null null null null null null null null null null null null null null null 6.300596 8.354556
"min" "290000017" "226" "0000000001" "119" "004" "11" "01" "C" "00" "1" "000" "11" "01" "C" "02" "1" "000" 1.0 1.0 0.0 "1" "0" "" "01" "2020" "0" "" "" 0.0 "01090" "01090" 104.0 22.0 10.0 0.0 "0022" ... 0.0 0.0 0.0 0.0 0.0 0.0 "" "0" "0022" 0.0 "0" 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 "0" "" "" "" "0" "" "0" "0" "" "" "" "" "" "01" "A010" "" 0.0 0.0
"max" "290000017" "226" "0000128920" "120" "006" "11" "90" "Z" "62" "Z" "241" "11" "28" "Z" "62" "Z" "241" 10.0 105.0 363.0 "2" "8" "R" "12" "2020" "9" "7" "B" 246.0 "9H014" "99404" 6640.0 42.0 300.0 3.0 "9999" ... 0.0 0.0 33.0 1.0 55.0 3.0 "1" "0" "9999" 33.0 "3" 90.0 55.0 119.0 52.0 84.0 117.0 91.0 33.0 "1" "4" "4" "" "1" "2" "1" "0" "1" "2" "2" "" "C" "08" "Z988" "Z992+0" 214.0 534.0
"median" null null null null null null null null null null null null null null null null null 1.0 64.0 0.0 null null null null null null null null 0.0 null null 3300.0 39.0 268.0 0.0 null ... 0.0 0.0 0.0 0.0 0.0 0.0 null null null 0.0 null 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 null null null null null null null null null null null null null null null null 0.0 1.0
rsa['actes'].describe()
shape: (7, 14)
describe cle_rsa delai cdccam descri phase act extdoc modif rmbtexcep assonp nbexec indval nseqrum
str str f64 str str str str str str str str f64 str str
"count" "413144" 413144.0 "413144" "413144" "413144" "413144" "413144" "413144" "413144" "413144" 413144.0 "413144" "413144"
"null_count" "0" 1091.0 "0" "0" "0" "0" "0" "0" "0" "0" 0.0 "0" "0"
"mean" null 4.180067 null null null null null null null null 1.0 null null
"std" null 9.975205 null null null null null null null null 0.0 null null
"min" "0000000001" 0.0 "AAFA001" "" "0" "1" "" "" "" "" 1.0 "1" "01"
"max" "0000128920" 368.0 "ZZQX217" "48" "3" "5" "7" "ZF" "O" "5" 1.0 "1" "10"
"median" null 1.0 null null null null null null null null 1.0 null null
rsa['diags'].describe()
shape: (7, 5)
describe cle_rsa nseqrum diag position
str str str str f64
"count" "404224" "404224" "404224" 404224.0
"null_count" "0" "0" "0" 0.0
"mean" null null null 2.363118
"std" null null null 1.093825
"min" "0000000001" "01" "A010" 1.0
"max" "0000128920" "10" "Z998" 4.0
"median" null null null 3.0
rsa['rsa_um'].describe()
shape: (7, 18)
describe cle_rsa nseqrum nsequm nohop1 dpum drum igs2 agegestrum nbdiagas nbacte dureesejpart typaut1 natsupp1 nbsupp1 typaut2 natsupp2 nbsupp2
str str str str str str str str str f64 f64 f64 str str f64 str str str
"count" "143389" "143389" "143389" "143389" "143389" "143389" "143389" "143389" 143389.0 143389.0 143389.0 "143389" "143389" 143389.0 "143389" "143389" "143389"
"null_count" "0" "0" "0" "0" "0" "0" "0" "0" 0.0 0.0 0.0 "0" "0" 0.0 "0" "0" "0"
"mean" null null null null null null null null 2.591022 2.881281 2.139571 null null 0.327856 null null null
"std" null null null null null null null null 4.121288 6.695112 5.026426 null null 2.228983 null null null
"min" "0000000001" "01" "0001" "290000058" "A010" "" "000" "" 0.0 0.0 0.0 "01AC" "00" 0.0 "" "" ""
"max" "0000128920" "10" "0103" "290004365" "Z998" "Z992+0" "880" "42" 46.0 530.0 246.0 "73 M" "13" 117.0 "01AC" "02" "0004"
"median" null null null null null null null null 1.0 1.0 0.0 null null 0.0 null null null

Case-mix GHM / GHS et DMS

on écrit to_pandas() à la fin pour un meilleur affichage dans le notebook Quarto.

(
  rsa['rsa']
  # à la volée on ajoute des colonnes utiles au calcul (ghm, duree hors 0 nuit, nb de 0 nuit)
  .with_columns(
    pl.concat_str(['rsacmd', 'rsatype', 'rsanum', 'rsacompx']).alias('ghm'),
    pl.when(pl.col('duree') > 0).then(pl.col('duree')).otherwise(None).alias('duree1'),
    pl.when(pl.col('duree') == 0).then(True).otherwise(False).alias('flag0')
  )
  .groupby(['ghm', 'noghs'])
  .agg(pl.col('duree').mean().alias('DMS avec 0 nuitée').round(1),
       pl.col('duree1').mean().alias('DMS 1 nuit et +').round(1),
       pl.col('duree').count().alias('Nb séjours'),
       pl.col('flag0').sum().alias('Nb séjours 0 nuitée'))
  .sort(pl.col('ghm'))
  .head(5)
  .to_pandas()
)
ghm noghs DMS avec 0 nuitée DMS 1 nuit et + Nb séjours Nb séjours 0 nuitée
0 01C031 0022 3.9 4.2 14 1
1 01C032 0023 7.5 7.5 33 0
2 01C033 0024 14.3 14.3 28 0
3 01C034 0025 31.4 31.4 16 0
4 01C041 0026 4.6 4.7 107 2

Requête actes / diags

On cherche à dénombrer rapidement les prises en charge avec un acte CCAM commençant par HFCC et les ventiler par diagnostic principal du séjour.

on écrit to_pandas() à la fin pour un meilleur affichage dans le notebook Quarto.

(
  rsa['actes']
  .filter(pl.col('cdccam').str.contains('HFCC'))
  .filter(pl.col('act') == '1')
  .join(rsa['rsa'].select(['cle_rsa', 'duree', 'dp']), how = 'inner', on = 'cle_rsa')
  .groupby('dp', 'cdccam')
  .agg(pl.col('duree').count().alias('nb_rsa'),
       pl.col('duree').mean().round(1).alias('DMS'))
  .sort(pl.col('nb_rsa'), descending = True)
  .head(5)
  .to_pandas()
)
dp cdccam nb_rsa DMS
0 E6696 HFCC003 16 4.9
1 E6695 HFCC003 5 3.0
2 K265 HFCC001 4 7.8
3 E6606 HFCC003 1 4.0
4 K918 HFCC001 1 8.0

Compter les erreurs de la fonction groupage

# Sélectionner des colonnes avec une regexp : 
# on trouve rsacdretr qui est la colonne que l'on cherche
(
rsa['rsa']
  .select(pl.col('^rsa.*$'), 'duree')
  .head(5)
  .to_pandas()
)
rsavclass rsacmd rsatype rsanum rsacompx rsacdretr duree
0 11 04 M 13 4 000 202
1 11 15 M 05 D 154 206
2 11 11 K 02 4 000 128
3 11 05 C 12 4 000 119
4 11 23 M 06 2 000 246
(
rsa['rsa']
  .select(pl.col('^rsa.*$'), 'duree')
  .groupby('rsacdretr')
  .agg(pl.col('duree').count().alias('nb_rsa'))
  .filter(~pl.col('rsacdretr').str.contains('000'))
  .sort('nb_rsa', descending = True)
  .head(5)
  .to_pandas()
)
rsacdretr nb_rsa
0 170 1140
1 154 756
2 223 220
3 152 176
4 112 120