Skip to content

Commit

Permalink
feat: github crawl intergrate DB
Browse files Browse the repository at this point in the history
commit
  • Loading branch information
vtlinh02 committed Jul 31, 2024
1 parent d2ed32e commit a2d6974
Show file tree
Hide file tree
Showing 47 changed files with 10,916 additions and 12,078 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/node.js.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs

name: Node.js CI

on:
push:
branches:
- demo
jobs:
handle-commit:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [18.x]
# See supported Node.js release schedule at https://nodejs.org/en/about/releases/
outputs:
output: ${{steps.handling.outputs.output}}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
- name: Install dependency
run: |
npm install -g pnpm
pnpm install
npm install -g ts-node
- name: test program working?
run: npm run crawl --silent
if: github.event.pull_request.merged == true
- name: run commit handling
id: handling
run: |
output=$(npm run crawl --silent)
echo "output=$output" >> "$GITHUB_OUTPUT"
send-rest-api:
needs: handle-commit
runs-on: ubuntu-latest
steps:
- name: Output of previous step
env:
output: ${{needs.handle-commit.outputs.output}}
run: |
echo $output
echo "$output"
curl "$output"
if: github.event.pull_request.merged == true
4 changes: 3 additions & 1 deletion apps/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
"dev": "nodemon -x pnpm start:watch",
"start:watch": "ts-node -r tsconfig-paths/register src/index.ts",
"start": "node -r ./tsconfig-paths.js dist/index.js",
"format": "prettier --write \"**/*.{ts,tsx,md}\""
"format": "prettier --write \"**/*.{ts,tsx,md}\"",
"crawl": "ts-node -r tsconfig-paths/register src/crawl.ts",
"test": "ts-node -r tsconfig-paths/register src/test.ts"
},
"keywords": [],
"author": "",
Expand Down
2 changes: 1 addition & 1 deletion apps/server/src/configs/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ export const DatabaseConfig: DataSourceOptions = {
ProjectDescriptions,
],
synchronize: true,
logging: true,
logging: false,
}
32 changes: 32 additions & 0 deletions apps/server/src/controllers/base.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { Request, Response } from 'express'
import 'dotenv/config'
import { creatorParentCategory } from '@/creator/creatorParentCategory'
import { fsWrapper } from '@/utils/fs/fsWrapper'
import { creatorSocial } from '@/creator/creatorSocial'

function createCategory(parentFolders) {
parentFolders.map((folder) => {
const path = `/projects/${folder}`
creatorParentCategory(path)
})
}

export class BaseController {
public buildBaseDatabase = async (req: Request, res: Response) => {
await creatorSocial()
const rootHierachy = await fsWrapper.readdir(`/projects`)
const parentFolders = rootHierachy.filter(
(value) => value !== 'projects.json' && value !== 'socials.json',
)

createCategory(parentFolders)

res.send('successful')
}

public test = async (req, res) => {
console.log('come here')

res.send('successful')
}
}
17 changes: 17 additions & 0 deletions apps/server/src/controllers/crawl.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { CrawlService } from '@/services/crawl.service'
import { Request, Response } from 'express'

export class CrawlController {
constructor(private crawlService = new CrawlService()) {}

public crawl = async (req: Request, res: Response) => {
// @ts-ignore
const fileChanges: Array<string> = req.query.dataRaw

console.log(fileChanges)

if (fileChanges.length !== 0) this.crawlService.crawl(fileChanges)

res.send('successful')
}
}
33 changes: 33 additions & 0 deletions apps/server/src/crawl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { getListFileChange } from './crawl/getListFileChange'

export function fromFileChangesToQuery(fileChanges: Array<string>): string {
if (fileChanges.length == 0) return ''

if (fileChanges.length == 1) return `?dataRaw[]=${fileChanges[0]}`

let dataReturn = '?'
for (let i = 0; i < fileChanges.length - 1; i++) {
dataReturn += `dataRaw[]=${fileChanges[i]}&`
}

dataReturn += `dataRaw[]=${fileChanges[fileChanges.length - 1]}`

return dataReturn
}

export function getURL(data: string) {
const server = 'https://96cc-42-119-180-122.ngrok-free.app'

return `${server}/api/v1/crawl${data}`
}

async function main() {
// get list file change
const fileChanges = await getListFileChange()

const query: string = fromFileChangesToQuery(fileChanges)

console.log(`${getURL(query)}`)
}

main()
23 changes: 23 additions & 0 deletions apps/server/src/crawl/classificationCase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import 'dotenv/config'
import { filterNewParent } from './utils/filterNewParent'
import { filterNewSub } from './utils/filterNewSub'
import { filterNewProject } from './utils/filterNewProject'

export class DataReturn {
parentCategory: Array<string> // path
subCategory: Array<string> // path,
project: Array<string> // path
}

export async function classificationCase(
fileChange: Array<string>,
): Promise<DataReturn> {
// fileChange is an array of file path

const dataReturn = new DataReturn()
const fileLay1 = await filterNewParent(dataReturn, fileChange)
const fileLay2 = await filterNewSub(dataReturn, fileLay1)
filterNewProject(dataReturn, fileLay2)

return dataReturn
}
45 changes: 45 additions & 0 deletions apps/server/src/crawl/getListFileChange.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// get the ID of 2 command in git log --oneline

// run get diff, get the output

import { exec } from 'child_process'

function getIDsMergeAction(output: string) {
const array = output.split('\n')

const id0 = array[0].split(' ')[0]
const id1 = array[1].split(' ')[0]
console.log(id0)
console.log(id1)
return {
recent: id0,
past: id1,
}
}

function listProjectChanges(output: string): Array<string> {
const array = output.split('\n')
const projectsRaw = array.filter((value) => value.includes('projects/'))
if (projectsRaw.length == 0) return []

const projects = projectsRaw.map((data) => {
const arr = data.split(' ')
return arr[1]
})
return projects
}

export function getListFileChange(): Promise<Array<string>> {
return new Promise((res, rej) => {
exec('git log --oneline', (error, stdout, stderr) => {
const merge = getIDsMergeAction(stdout)

exec(
`git diff --stat ${merge.past} ${merge.recent}`,
(error, stdout, stderr) => {
res(listProjectChanges(stdout))
},
)
})
})
}
59 changes: 59 additions & 0 deletions apps/server/src/crawl/handleCase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import 'dotenv/config'
import { DataReturn } from './classificationCase'
import { creatorParentCategory } from '@/creator/creatorParentCategory'
import { creatorSubCategory } from '@/creator/creatorSubCategory'
import { fsWrapper } from '@/utils/fs/fsWrapper'
import { deleteProject } from './utils/deleteProject'
import { creatorProject } from '@/creator/creatorProject'
import { connection } from '@/databases/connection'
import { Categories } from '@/databases/entities/Categories'
import { Projects } from '@/databases/entities/Projects'
import { ProjectJSON } from '@/shared/schema/ProjectJSON'

function getCategoryName(projectFolder: string): string {
const array = projectFolder.split('/')
return array[array.length - 2]
}

async function getCategory(projectFolder: string) {
const name = getCategoryName(projectFolder)

const category = await connection
.getRepository(Categories)
.findOneBy({ name })

return category
}

export async function handleCase(caseData: DataReturn) {
if (caseData.parentCategory.length !== 0) {
caseData.parentCategory.map((parent) => {
creatorParentCategory(`/${parent}`)
})
}

if (caseData.subCategory.length !== 0) {
caseData.subCategory.map((subPath) => {
creatorSubCategory(`/${subPath}`)
})
}

if (caseData.project.length !== 0) {
caseData.project.map(async (projectPath) => {
const detailRaw = await fsWrapper.readFile(`/${projectPath}/info.json`)
const detail: ProjectJSON = JSON.parse(detailRaw)

const project = await connection
.getRepository(Projects)
.findOneBy({ name: detail.display_term })

if (project) {
await deleteProject(project)
}

const category = await getCategory(projectPath)

creatorProject(`/${projectPath}`, category)
})
}
}
45 changes: 45 additions & 0 deletions apps/server/src/crawl/utils/deleteProject.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { connection } from '@/databases/connection'
import { Projects } from '@/databases/entities/Projects'

export function deleteProject(project: Projects) {
const projectSocialQuery = `
DELETE FROM project_socials
WHERE project_socials.project_id = ${project.id};
`

const partnershipQuery = `
DELETE FROM partnerships
WHERE partnerships.project_id = ${project.id}
`

const projectTagQuery = `
DELETE FROM project_tags
WHERE project_tags.project_id = ${project.id}
`

const projectFeatureQuery = `
DELETE FROM project_features
WHERE project_features.project_id = ${project.id}`

const projectGlossaryQuery = `
DELETE FROM glossary_projects
WHERE glossary_projects.project_id = ${project.id}
`

const projectQuery = `
DELETE FROM projects
WHERE projects.id = ${project.id}
`

Promise.all([
connection.manager.query(projectSocialQuery),
connection.manager.query(partnershipQuery),
connection.manager.query(projectTagQuery),
connection.manager.query(projectFeatureQuery),
connection.manager.query(projectGlossaryQuery),
])
.then(() => {
connection.manager.query(projectQuery)
})
.catch((error) => console.log(error))
}
59 changes: 59 additions & 0 deletions apps/server/src/crawl/utils/filterNewParent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// layer 1
import { connection } from '@/databases/connection'
import { DataReturn } from '../classificationCase'

function getParentName(path: string): string {
const array = path.split('/')
return array[1]
}

function getListParentName(fileChange: Array<string>): Array<string> {
const set = new Set<string>()
fileChange.forEach((file) => {
set.add(getParentName(file))
})

return Array.from(set)
}

export async function filterNewParent(
dataReturn: DataReturn,
fileChange: Array<string>,
): Promise<Array<string>> {
const listName = getListParentName(fileChange)
const queryIn = listName.map((name) => `"${name}"`).join(',')
const query = `
SELECT category.pathName
FROM category
WHERE category.parent IS NULL AND category.pathName IN (${queryIn});
`
const datas: Array<{ pathName: string }> =
await connection.manager.query(query)

let parentNew: Array<string> = []
const parentExist: Array<string> = []

listName.forEach((name) => {
const isExist = datas.some((data) => data.pathName == name)
if (isExist) parentExist.push(name)
else parentNew.push(name)
})

parentNew = parentNew.map((data) => `projects/${data}`)

// Now we have parent new, let add it to the dataReturn;
dataReturn.parentCategory = parentNew

let fileChangeUpdate = fileChange.filter((file) => {
let isValid: boolean = false
for (let i = 0; i < parentExist.length; i++) {
if (file.includes(parentExist[i])) {
isValid = true
break
}
}
return isValid
})

return fileChangeUpdate
}
Loading

0 comments on commit a2d6974

Please sign in to comment.