-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtruncate-data-commit-history.txt
129 lines (89 loc) · 3.68 KB
/
truncate-data-commit-history.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
echo NOTE: manually run the lines below one by one, and perform the manual tasks
echo this is not fully automated!!!
exit 0
#
# 0. backup original Repo!!!
#
#
# 1. Backup as new GitHub repo
#
# create new repo at GitHub.com
https://github.com/entorb/COVID-19-Coronavirus-German-Regions-OLD-DATA-BACKUP-230831
# clone orig repo into a new dir
f:
mkdir covid
cd covid
wget https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo
git clone git@github.com:entorb/COVID-19-Coronavirus-German-Regions.git
cp -r COVID-19-Coronavirus-German-Regions COVID-19-Coronavirus-German-Regions-OLD-DATA-BACKUP-230831
cp COVID-19-Coronavirus-German-Regions/.git/config config
cd COVID-19-Coronavirus-German-Regions-OLD-DATA-BACKUP-230831
# removing git workflows using git-filter-repo
python3 ../git-filter-repo --prune-empty always --invert-paths --path .github --path maps/out
# modify files
cp ../config .git/config
sed -i -e 's/COVID-19-Coronavirus-German-Regions/COVID-19-Coronavirus-German-Regions-OLD-DATA-BACKUP-230831/g' .git/config
git push -f
#
# 1.2 in case the repo update is > 2GB, the commits need to be pushed in smaller chunks
#
# remote: fatal: pack exceeds maximum allowed size (2.00 GiB)
# get id of inital commit
git log --pretty=oneline --reverse | head -1
5265fb3292c3d034ebc53a88c067431c3a52a53e
# push initial commit
git push origin 5265fb3292c3d034ebc53a88c067431c3a52a53e:refs/heads/master
git log --pretty=oneline --reverse >../git-history.txt
# push other commits from git-history.txt, 250 per run,
git push origin 03d35afab4df784b6b70c5483bbd2ea2eb4909e2:master
git push origin dbeb9c0679edbd34311c9e2d10f493e36933089d:master
git push origin 5fc27cc71f4da443f5344849b52c253d9f35af7f:master
git push origin ed90dfdd33fce0f922e99e987ebb8690bd895e39:master
git push
ggf .git/config von orig Repo kopieren (und pfad anpassen, siehe oben), wenn push target fehlt
# check new Repo (files are up to date?)
https://github.com/entorb/COVID-19-Coronavirus-German-Regions-OLD-DATA-BACKUP-230831
# done
#
# 2. cleanup original data by deleting data file changes from git commit history
#
# clone orig repo in a new dir
# or use copy from above
cd ..
mkdir data-cleanup-backup-dirs
git clone git@github.com:entorb/COVID-19-Coronavirus-German-Regions.git
# not using --bare, since I want to remove the current files
cd COVID-19-Coronavirus-German-Regions
move cache ..\data-cleanup-backup-dirs\
move data ..\data-cleanup-backup-dirs\
move draft ..\data-cleanup-backup-dirs\
move maps ..\data-cleanup-backup-dirs\
move old ..\data-cleanup-backup-dirs\
move plots-gnuplot ..\data-cleanup-backup-dirs\
move plots-python ..\data-cleanup-backup-dirs\
move plots-Excel ..\data-cleanup-backup-dirs\
git add .
git commit -m "remove data files before cleanup of git history"
python3 ../git-filter-repo --force --prune-empty always --invert-paths --path cache --path data --path draft --path maps --path old --path plots-gnuplot --path plots-python --path plots-Excel
# restore dirs and files
move ..\data-cleanup-backup-dirs\cache .\
move ..\data-cleanup-backup-dirs\data .\
move ..\data-cleanup-backup-dirs\draft .\
move ..\data-cleanup-backup-dirs\maps .\
move ..\data-cleanup-backup-dirs\old .\
move ..\data-cleanup-backup-dirs\plots-gnuplot .\
move ..\data-cleanup-backup-dirs\plots-python .\
move ..\data-cleanup-backup-dirs\plots-Excel .\
git add .
git update-index --chmod=+x */*.sh
git commit -m "restoring data files after cleanup"
copy ../config .git/config
git push -f
# 3. sync real source file to this repo
D:
cd \GitHub\COVID-19-Coronavirus-German-Regions
git fetch origin
git reset --hard origin/main
# cleanup via git garbage collector
git reflog expire --expire=now --all
git gc --prune=now --aggressive