From c81b0be65a57289e91e1922970ef469840e1988c Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 26 Apr 2024 13:04:57 -0500 Subject: [PATCH 1/2] ci: add markdown linter Signed-off-by: Nathan Weinberg Signed-off-by: BJ Hargrave --- .github/workflows/docs.yml | 46 ++++++++++++++++++++++++++++++++++++++ .markdownlint-cli2.yaml | 16 +++++++++++++ Makefile | 22 ++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 .github/workflows/docs.yml create mode 100644 .markdownlint-cli2.yaml create mode 100644 Makefile diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..87a17b3 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: Lint Markdown documents + +on: + push: + branches: + - "main" + paths: + - '**/*.md' + - '.markdownlint-cli2.yaml' + - '.github/workflows/docs.yml' # This workflow + pull_request: + branches: + - "main" + paths: + - '**/*.md' + - '.markdownlint-cli2.yaml' + - '.github/workflows/docs.yml' # This workflow + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + markdown-lint: + runs-on: ubuntu-latest + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + - name: "Checkout" + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + with: + fetch-depth: 0 + - name: "Check Markdown documents" + uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0 + with: + globs: '**/*.md' diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml new file mode 100644 index 0000000..59f2fa6 --- /dev/null +++ b/.markdownlint-cli2.yaml @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: Apache-2.0 + +config: + line-length: false + no-emphasis-as-header: false + first-line-heading: false + code-block-style: false + no-duplicate-header: false + single-trailing-newline: false +globs: + - "**/*.md" +ignores: + - ".github/**" + - ".tox/**" + - "venv/**" + - ".venv/**" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c33811f --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +.PHONY: help +help: + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +# +# If you want to see the full commands, run: +# NOISY_BUILD=y make +# +ifeq ($(NOISY_BUILD),) + ECHO_PREFIX=@ + CMD_PREFIX=@ + PIPE_DEV_NULL=> /dev/null 2> /dev/null +else + ECHO_PREFIX=@\# + CMD_PREFIX= + PIPE_DEV_NULL= +endif + +.PHONY: md-lint +md-lint: ## Lint markdown files + $(ECHO_PREFIX) printf " %-12s ./...\n" "[MD LINT]" + $(CMD_PREFIX) podman run --rm -v $(CURDIR):/workdir --security-opt label=disable docker.io/davidanson/markdownlint-cli2:latest > /dev/null From d6b999909df8009808607b7a92421e0368721621 Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Thu, 2 May 2024 13:53:03 -0400 Subject: [PATCH 2/2] docs: Fixes for markdown linting problems Signed-off-by: BJ Hargrave --- COCC.md | 6 +- CODE_OF_CONDUCT.md | 4 +- CONTRIBUTING.md | 36 +++++----- CONTRIBUTOR_ROLES.md | 110 ++++++++++++++--------------- Collaboration.md | 84 +++++++++++----------- FAQ.md | 95 ++++++++++++------------- InstructLabSlackGuide.md | 37 +++++----- InstructLabSlackModerationGuide.md | 24 ++++--- MAINTAINERS.md | 12 ---- QUICK_START_GUIDE.md | 23 +++--- README.md | 66 ++++++++--------- docs/DataSources.md | 33 +++++---- docs/README.md | 19 +++-- governance.md | 18 ++--- tools/maintainers/README.md | 2 +- 15 files changed, 284 insertions(+), 285 deletions(-) diff --git a/COCC.md b/COCC.md index 3cd9430..08a1af8 100644 --- a/COCC.md +++ b/COCC.md @@ -1,14 +1,14 @@ # Filing a Code of Conduct Violation Report -We take reports of violations to our project [Code of Conduct](https://github.com/instructlab/community/blob/main/CODE_OF_CONDUCT.md) with the utmost seriousness and will act upon them as quickly as possible. +We take reports of violations to our project [Code of Conduct](https://github.com/instructlab/community/blob/main/CODE_OF_CONDUCT.md) with the utmost seriousness and will act upon them as quickly as possible. To report a Code of Conduct violation to our Code of Conduct Committee, you may reach out to us by email at [coc@instructlab.ai](mailto:coc@instructlab.ai). The email will be read and acted upon by our Code of Conduct Committee members. If you experience a Code of Conduct violation in our InstructLab Slack workspace, please follow the [instructions in our moderation guide](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md#reporting-abuse) to get immediate help in Slack. -As part of our follow up on your report, we would like to contact you for further discussion. If you would prefer not to engage beyond reporting the matter to the committee, please let us know that as part of your submission. We will respect your request. +As part of our follow up on your report, we would like to contact you for further discussion. If you would prefer not to engage beyond reporting the matter to the committee, please let us know that as part of your submission. We will respect your request. -# Code of Conduct Committee Members +## Code of Conduct Committee Members The current members of the CoCC are: diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 533a311..701f2fe 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -69,9 +69,9 @@ members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html +available at [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b07161a..5c1f8d7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,21 +50,21 @@ We welcome contributions in the form of pull requests for documentation. To subm The InstructLab project uses the _Fork and Pull_ model for contribution that is common in many open source repositories; this entails multiple steps, including forking and cloning the repository, creating a _pull request_, or PR, and more. For details on this process, check out [The GitHub Workflow Guide](https://github.com/kubernetes/community/blob/master/contributors/guide/github-workflow.md) from Kubernetes. -After you have forked and cloned the repository, you can start the contribution process by looking at the issue trackers of the [community repository](https://github.com/instructlab/community/pulls), [CLI repository](https://github.com/instructlab/instructlab/issues), or the [taxonomy repository](https://github.com/instructlab/taxonomy/issues). You can then pick up an issue by leaving a comment on said issue, and address the issue in a pull request (PR). Prior to submission, make sure that your changes pass formatting, linting, and unit tests. Additionally, all PRs must agree to the terms of [Developer Certificate of Origin (DCO)](https://developercertificate.org/) by signing off your commits. Only PRs with commits signed off are accepted. If you didn't sign off your commits before creating the pull request, no worries, you can fix that after the fact. For more information about this process, see [Developer Certificate of Origin (DCO)](#DCO). +After you have forked and cloned the repository, you can start the contribution process by looking at the issue trackers of the [community repository](https://github.com/instructlab/community/pulls), [CLI repository](https://github.com/instructlab/instructlab/issues), or the [taxonomy repository](https://github.com/instructlab/taxonomy/issues). You can then pick up an issue by leaving a comment on said issue, and address the issue in a pull request (PR). Prior to submission, make sure that your changes pass formatting, linting, and unit tests. Additionally, all PRs must agree to the terms of [Developer Certificate of Origin (DCO)](https://developercertificate.org/) by signing off your commits. Only PRs with commits signed off are accepted. If you didn't sign off your commits before creating the pull request, no worries, you can fix that after the fact. For more information about this process, see [Developer Certificate of Origin (DCO)](#developer-certificate-of-origin-dco). Then, you can submit the PR to be reviewed. In general, we follow the standard [GitHub pull request](https://help.github.com/en/articles/about-pull-requests) process. Follow the provided template on your PR to include details about your pull request for the maintainers. -> [!IMPORTANT] +> [!IMPORTANT] > If you are seeking to make a larger contribution, such as introducing a new feature or functionality, or refactoring a significant portion of the codebase to improve performance, readability, or maintainability, [get in touch](#communication) with us prior to starting. This helps ensure that your time is not wasted working on a change that the project developers will not accept into the codebase. ### Pull request review Once you've created a pull request (PR), maintainers will review your code and may make suggestions to fix before merging. It will be easier for your PR to receive reviews if you consider the criteria the reviewers follow while working. Remember to: -- Run tests locally and ensure that they pass -- Follow the project coding conventions -- Write detailed commit messages -- Break large changes into a logical series of smaller patches, which are easy to understand individually and combine to solve a broader issue +* Run tests locally and ensure that they pass +* Follow the project coding conventions +* Write detailed commit messages +* Break large changes into a logical series of smaller patches, which are easy to understand individually and combine to solve a broader issue For a list of the maintainers and triagers, see the [MAINTAINERS.md](MAINTAINERS.md) page. @@ -72,8 +72,8 @@ For a list of the maintainers and triagers, see the [MAINTAINERS.md](MAINTAINERS To propose a new feature, it's best to raise an issue in the appropriate repository: -- [Instructlab CLI repository](https://github.com/instructlab/instructlab/issues) -- [Taxonomy repository](https://github.com/instructlab/taxonomy/issues) +* [Instructlab CLI repository](https://github.com/instructlab/instructlab/issues) +* [Taxonomy repository](https://github.com/instructlab/taxonomy/issues) This way, features can be discussed with the project maintainers, ensuring that your time is not wasted working on a feature that the project developers will not accept into the codebase. @@ -108,20 +108,20 @@ git commit -s The following example includes a `Signed-off-by:` line, which indicates that the submitter has accepted the DCO: -``` +```text Signed-off-by: John Doe ``` We automatically verify that all commit messages contain a `Signed-off-by:` line with your email address. -#### Useful tools for doing DCO signoffs +#### Useful tools for doing DCO signoffs There are a number of tools that make it easier for developers to manage DCO signoffs. -- DCO command line tool, which let's you do a single signoff for an entire repo ( ) -- GitHub UI integrations for adding the signoff automatically ( ) -- Chrome - -- Firefox - +* DCO command line tool, which let's you do a single signoff for an entire repo ( ) +* GitHub UI integrations for adding the signoff automatically ( ) +* Chrome - +* Firefox - ## Communication @@ -133,12 +133,12 @@ The following resources include additional information about each repository, su ### ilab CLI tool additional resources -- [`ilab` CLI tool README.md](https://github.com/instructlab/instructlab/blob/main/README.md#). This resources provides information about the `ilab` CLI tool, including an overview, getting started, training the model, submitting a pull request, etc. +* [`ilab` CLI tool README.md](https://github.com/instructlab/instructlab/blob/main/README.md#). This resources provides information about the `ilab` CLI tool, including an overview, getting started, training the model, submitting a pull request, etc. -- [`ilab` CLI tool CONTRIBUTING.md](https://github.com/instructlab/instructlab/blob/main/CONTRIBUTING/CONTRIBUTING.md). This resources provides information about contributing to the `ilab` CLI tool repository, reporting bugs, testing, coding styles, etc. +* [`ilab` CLI tool CONTRIBUTING.md](https://github.com/instructlab/instructlab/blob/main/CONTRIBUTING/CONTRIBUTING.md). This resources provides information about contributing to the `ilab` CLI tool repository, reporting bugs, testing, coding styles, etc. ### Taxonomy additional resources -- [Taxonomy README.md](https://github.com/instructlab/taxonomy/blob/main/README.md). This resource provides information about the taxonomy repository, including getting started, YAML examples for skills and knowledge pull requests, how to contribute, etc. +* [Taxonomy README.md](https://github.com/instructlab/taxonomy/blob/main/README.md). This resource provides information about the taxonomy repository, including getting started, YAML examples for skills and knowledge pull requests, how to contribute, etc. -- [Taxonomy CONTRIBUTING.md](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md). This resource contains information and best practices for contributing to the taxonomy repository. +* [Taxonomy CONTRIBUTING.md](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md). This resource contains information and best practices for contributing to the taxonomy repository. diff --git a/CONTRIBUTOR_ROLES.md b/CONTRIBUTOR_ROLES.md index 4a1f08f..710dd95 100644 --- a/CONTRIBUTOR_ROLES.md +++ b/CONTRIBUTOR_ROLES.md @@ -2,8 +2,8 @@ InstructLab is made up of several projects that are defined as codebases and services with different release cycles. Collectively, these enable large-model development. Currently, these projects include the following: -* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the the `ilab` CLI tool. -* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. +* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the the `ilab` CLI tool. +* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. This document outlines a core number of contributor roles for InstructLab projects, such as _Member_, _Triager_, _Reviewer_, and _Maintainer_. In the future, an _Oversight Committee_ will be formed, which will serve to supervise the overall InstructLab project and its health. Using transparent criteria, the journey between roles is based on individual participation. @@ -33,28 +33,28 @@ Members are active contributors in the community. They can have issues and pull To become a project Member, you must meet the following requirements: -- You have made multiple contributions to the project or community. Contributions may include, but are not limited to: +* You have made multiple contributions to the project or community. Contributions may include, but are not limited to: - - Authoring or reviewing PRs on GitHub. - - Filing or commenting on issues on GitHub. - - Contributing to community discussion, for example, meetings or on Slack. + * Authoring or reviewing PRs on GitHub. + * Filing or commenting on issues on GitHub. + * Contributing to community discussion, for example, meetings or on Slack. -- You have been sponsored by two Maintainers. +* You have been sponsored by two Maintainers. If you have met these expectations and wish to become an established member, you can be nominated by a contributor, or you can nominate yourself. To nominate a contributor or yourself: -- Open an issue in the repository of interest detailing contributions to the project so far. -- Ensure that the sponsors are `@mentioned` on the issue. -- Make sure that the list of contributions included is representative of the work on the project. +* Open an issue in the repository of interest detailing contributions to the project so far. +* Ensure that the sponsors are `@mentioned` on the issue. +* Make sure that the list of contributions included is representative of the work on the project. #### Member responsibilities and privileges -As a project Member, you have the following responsibilities and privileges: +As a project Member, you have the following responsibilities and privileges: -- You are responsive to issues and the pull requests assigned to them. -- You are an active owner of code that you have contributed, unless ownership is explicitly transferred: - - You provide code that consistently pass tests. - - You consistently address bugs or issues that are discovered after code has been accepted. +* You are responsive to issues and the pull requests assigned to them. +* You are an active owner of code that you have contributed, unless ownership is explicitly transferred: + * You provide code that consistently pass tests. + * You consistently address bugs or issues that are discovered after code has been accepted. **Note:** Members who frequently contribute code are expected to proactively perform code reviews and work towards becoming a Reviewer for the project that they are active in. @@ -69,51 +69,51 @@ Triagers are active contributors in the community through issue and pull request To become a project Triager, you must meet the following requirements: -- You have made multiple contributions to the project or community. Contribution may include, but is not limited to: - - Triaging open issues or PRs. - - Authoring or reviewing PRs on GitHub. - - Contributing to community discussions (e.g. meetings, Slack). +* You have made multiple contributions to the project or community. Contribution may include, but is not limited to: + * Triaging open issues or PRs. + * Authoring or reviewing PRs on GitHub. + * Contributing to community discussions (e.g. meetings, Slack). -- You have been sponsored by two Maintainers or Reviewers. +* You have been sponsored by two Maintainers or Reviewers. Any person who meets the requirements may be nominated by a contributor, including themselves. To nominate a contributor or yourself: -- Open an issue in the repository of interest detailing contributions to the project so far. -- Ensure that the sponsors are `@mentioned` on the issue. -- Make sure that the list of contributions included is representative of the work on the project. +* Open an issue in the repository of interest detailing contributions to the project so far. +* Ensure that the sponsors are `@mentioned` on the issue. +* Make sure that the list of contributions included is representative of the work on the project. #### Triager responsibilities and privileges -As a project Triager, you have the following responsibilities and privileges: +As a project Triager, you have the following responsibilities and privileges: -- You have permission to label issues and PRs. -- You consistently assign, close, and reopen issues or PRs. -- You actively triage issues and PRs with high quality. +* You have permission to label issues and PRs. +* You consistently assign, close, and reopen issues or PRs. +* You actively triage issues and PRs with high quality. ### Maintainer Maintainers are first and foremost contributors that have shown they are committed to the long term success of a project. Maintainership is about building trust with the community and being a person that everyone can depend on to make consistent decisions in the best interest of the project. -**Defined by:** *Maintainers* entry in the [MAINTAINER file](https://github.com/instructlab/community/blob/main/MAINTAINERS.md). +**Defined by:** _Maintainers_ entry in the [MAINTAINER file](https://github.com/instructlab/community/blob/main/MAINTAINERS.md). #### Maintainer requirements To become a project Maintainer, you must meet the following requirements: -- You have been a Member for at least 1 month. -- You have a deep understanding of the technical goals and direction of the project. -- You have a deep understanding of the technical domain of the project. -- You have made sustained contributions to design and direction by: - - Authoring and reviewing proposals. - - Initiating, contributing, and resolving discussions, such as emails, Slack, GitHub issues, meetings. - - Identifying subtle or complex issues in designs and implementation pull requests. -- You have directly contributed to the project through implementation and/or review. -- You have been sponsored by two Maintainers. +* You have been a Member for at least 1 month. +* You have a deep understanding of the technical goals and direction of the project. +* You have a deep understanding of the technical domain of the project. +* You have made sustained contributions to design and direction by: + * Authoring and reviewing proposals. + * Initiating, contributing, and resolving discussions, such as emails, Slack, GitHub issues, meetings. + * Identifying subtle or complex issues in designs and implementation pull requests. +* You have directly contributed to the project through implementation and/or review. +* You have been sponsored by two Maintainers. If you meet the requirements, nominate yourself to become a Maintainer by sending an email to the Maintainers with your candidacy. You must: -- Ensure that your sponsors are `@mentioned` on the email. -- Include a list of contributions representative of your work on the project. +* Ensure that your sponsors are `@mentioned` on the email. +* Include a list of contributions representative of your work on the project. Maintainers will vote privately and respond to the issue with either acceptance or with feedback for suggested improvement. Feedback may be given privately. @@ -121,22 +121,22 @@ After a [decision has been made](https://github.com/instructlab/community/blob/m #### Maintainer responsibilities and Privileges -As a project Maintainer, you have the following responsibilities and privileges: - -- You make and approve technical design decisions. -- You set technical direction and priorities. -- You define milestones and releases. -- You mentor and guide contributors to the project. -- You ensure the continued health of the project. -- You are responsive to review requests. -- You review assigned PRs that are related to your area of expertise. -- You focus on quality and correctness, including testing code and factoring content. -- You are responsible for project quality control via code reviews. -- You perform adequate test coverage to confidently release. -- The tests that you perform are passing reliably (i.e. not flaky) and are fixed when they fail. -- You ensure that a healthy process for discussion and decision making is in place. -- You work with other Maintainers to maintain the project's overall health and success holistically. -- Unless otherwise specified, you will be provided with permission to merge commits to the project repository branches. +As a project Maintainer, you have the following responsibilities and privileges: + +* You make and approve technical design decisions. +* You set technical direction and priorities. +* You define milestones and releases. +* You mentor and guide contributors to the project. +* You ensure the continued health of the project. +* You are responsive to review requests. +* You review assigned PRs that are related to your area of expertise. +* You focus on quality and correctness, including testing code and factoring content. +* You are responsible for project quality control via code reviews. +* You perform adequate test coverage to confidently release. +* The tests that you perform are passing reliably (i.e. not flaky) and are fixed when they fail. +* You ensure that a healthy process for discussion and decision making is in place. +* You work with other Maintainers to maintain the project's overall health and success holistically. +* Unless otherwise specified, you will be provided with permission to merge commits to the project repository branches. ## Stepping Down and the Emeritus Process diff --git a/Collaboration.md b/Collaboration.md index b87964d..2c952ea 100644 --- a/Collaboration.md +++ b/Collaboration.md @@ -1,49 +1,52 @@ # InstructLab Community Collaboration Spaces -We have a number of ways for folks to learn more about InstructLab, communicate with the project maintainers and their fellow users, or subscribe for project updates. +We have a number of ways for folks to learn more about InstructLab, communicate with the project maintainers and their fellow users, or subscribe for project updates. ## Getting Started with InstructLab -If you need help getting started with using or contributing to InstructLab, the best way to do so is via our [project Slack workspace](#chat) or [email lists](#email-lists) rather than posts on social media. +If you need help getting started with using or contributing to InstructLab, the best way to do so is via our [project Slack workspace](#chat) or [email lists](#email-lists) rather than posts on social media. -## [Project Meetings](#Project-Meetings) +## [Project Meetings](#project-meetings) -To stay up to date on when meetings are added and how to join them, subscribe to the [InstructLab project calendar](https://calendar.google.com/calendar/embed?src=c_23c2f092cd6d147c45a9d2b79f815232d6c3e550b56c3b49da24c4b5d2090e8f%40group.calendar.google.com). +To stay up to date on when meetings are added and how to join them, subscribe to the [InstructLab project calendar](https://calendar.google.com/calendar/embed?src=c_23c2f092cd6d147c45a9d2b79f815232d6c3e550b56c3b49da24c4b5d2090e8f%40group.calendar.google.com). The project will host more meetings as it evolves, but those we have already set up are listed here. -### [Weekly All Community Meeting](#all-community-meeting) -We host weekly community meetings each Tuesday at 14:00 UTC. ([time zone converter](https://www.timeanddate.com/worldclock/meetingdetails.html?year=2024&month=5&day=14&hour=14&min=0&sec=0&p1=37&p2=43&p3=101&p4=224&p5=213&p6=771&p7=248&p8=2)) +### [Weekly All Community Meeting](#weekly-all-community-meeting) -Weekly community meetings will feature regular updates on project happenings, including announcements, demos, places to meet with the project maintainers, and more. The second half of the hour will be dedicated to community organization and maintenance. +We host weekly community meetings each Tuesday at 14:00 UTC. ([time zone converter](https://www.timeanddate.com/worldclock/meetingdetails.html?year=2024&month=5&day=14&hour=14&min=0&sec=0&p1=37&p2=43&p3=101&p4=224&p5=213&p6=771&p7=248&p8=2)) + +Weekly community meetings will feature regular updates on project happenings, including announcements, demos, places to meet with the project maintainers, and more. The second half of the hour will be dedicated to community organization and maintenance. If you are new to the InstructLab project, the weekly community meeting is a great place to get started! -### [Office Hours](#Office-Hours) +### [Office Hours](#office-hours) + We have two dedicated Office Hours slots each Thursday so we're able to meet with folks across different time zones. See the InstructLab project calendar to select which time works best for you. -### [Triager Standup](#Triager-Standup) -We host daily Triage Team stand up meetings at 18:30 UTC. ([time zone converter](https://www.timeanddate.com/worldclock/meetingdetails.html?year=2024&month=5&day=29&hour=18&min=30&sec=0&p1=37&p2=43&p3=224&p4=213&p5=771&p6=248&p7=2&p8=101&iv=1800)). In this meeting, [triagers](https://github.com/instructlab/community/blob/main/CONTRIBUTOR_ROLES.md#triager) speak and discuss possible issues or successes with the different PRs put into the https://github.com/instructlab/taxonomy repo. +### [Triager Standup](#triager-standup) + +We host daily Triage Team stand up meetings at 18:30 UTC. ([time zone converter](https://www.timeanddate.com/worldclock/meetingdetails.html?year=2024&month=5&day=29&hour=18&min=30&sec=0&p1=37&p2=43&p3=224&p4=213&p5=771&p6=248&p7=2&p8=101&iv=1800)). In this meeting, [triagers](https://github.com/instructlab/community/blob/main/CONTRIBUTOR_ROLES.md#triager) speak and discuss possible issues or successes with the different PRs put into the repo. If you have questions or ideas, we have an open door policy and would love for you to join us. -## [Chat](#Chat) +## [Chat](#chat) For real-time chat discussions, please join our [InstructLab Slack workspace](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). -Slack history is deleted after 90 days, so for conversations that should preserved for a longer period use the [project mailing lists](#email-lists). +Slack history is deleted after 90 days, so for conversations that should preserved for a longer period use the [project mailing lists](#email-lists). If you want to add feedback or think there is a "large issue" to discuss, a mailing list or a specific repository issue tracker is a good place to have the conversation rather than Slack. If you are unsure of where to comment, [users@instructlab.ai](https://groups.google.com/a/instructlab.ai/g/users) is the best place to start. -## [Email Lists](#Email-Lists) +## [Email Lists](#email-lists) We use the following email lists for project communications. Subscriptions requires a [Google account](https://www.google.com/account/about/). To join a list, click the list name in the table below to visit the list subscription page. If an entry below is noted as an email alias or private to a specific group, you will not be able to join. -### [Aliases and Mailing Lists Catalog](#List-Catalog) +### [Aliases and Mailing Lists Catalog](#aliases-and-mailing-lists-catalog) -Name | Topic(s) +Name | Topic(s) -- | -- [announce](https://groups.google.com/a/instructlab.ai/g/announce) | announcements only list for news about releases and other project wide updates; very low traffic [coc](mailto:coc@instructlab.ai) | email alias for the [Code of Conduct Committee](https://github.com/instructlab/community/blob/main/COCC.md), used to report violations privately to the committee @@ -51,53 +54,50 @@ Name | Topic(s) [dev](https://groups.google.com/a/instructlab.ai/g/dev) | developer discussions, both front and backend [maintainers](https://groups.google.com/a/instructlab.ai/g/maintainers) | maintainer only discussions that must be private, to be used very sparingly [PyPI](https://groups.google.com/a/instructlab.ai/g/pypi) | maintainers only list used for PyPI packaging -[security](https://groups.google.com/a/instructlab.ai/g/security) | security disclosure notifications +[security](https://groups.google.com/a/instructlab.ai/g/security) | security disclosure notifications [security-reporting](mailto:security-reporting@instructlab.ai) | email alias to report a possible security vulnerability to project security team [users](https://groups.google.com/a/instructlab.ai/g/users) | user feedback on project or help getting started, e.g. "my first PR" -### [Who Can Post to and Read Each Mailing List](#List-Permissions) +### [Who Can Post to and Read Each Mailing List](#who-can-post-to-and-read-each-mailing-list) -This table describes who can join each email list, view posts, etc. Some lists, such as the coc and security lists are private so that reporting may be handled with discretion. +This table describes who can join each email list, view posts, etc. Some lists, such as the `coc` and `security` lists are private so that reporting may be handled with discretion. +List Name | who can join group | who can view posts | who can post to group | who can post as group +-- | -- | -- | -- | -- +announce | anyone | anyone | admins | admins +community | anyone | anyone | anyone | admins +dev | anyone | anyone | members | noone +maintainers | invite only - project maintainers | members | members | admins +pypi | invite only - project maintainers | members | members | admins +security | anyone | anyone | admins | admins +users | anyone | anyone | members | noone -List Name | who can join group | who can view posts | who can post to group | who can post as group --- | -- | -- | -- | -- -announce | anyone | anyone | admins | admins -community | anyone | anyone | anyone | admins -dev | anyone | anyone | members | noone -maintainers | invite only - project maintainers | members | members | admins -pypi | invite only - project maintainers | members | members | admins -security | anyone | anyone | admins | admins -users | anyone | anyone | members | noone +## [GitHub Discussions](#github-discussions) -## [GitHub Discussions](#GitHub-Discussions) +We are using the GitHub discussion boards in each repo for cases where we need to document things quickly but emphemerally, such as working together as a community to squash a nasty bug. In that case, a link to the appropriate discussion board post will be sent to the relevant project mailing lists so folks can follow along on GitHub. Rather than use the discussion boards to discuss proposals for enhancements or to request help with using InstructLab, please reach out on the project [email lists](#email-lists) or [Slack](#chat). -We are using the GitHub discussion boards in each repo for cases where we need to document things quickly but emphemerally, such as working together as a community to squash a nasty bug. In that case, a link to the appropriate discussion board post will be sent to the relevant project mailing lists so folks can follow along on GitHub. Rather than use the discussion boards to discuss proposals for enhancements or to request help with using InstructLab, please reach out on the project [email lists](#email-lists) or [Slack](#Chat). +## [Hugging Face](#hugging-face) -## [Hugging Face](#Hugging-Face) +We regularly post model builds on the project's [Hugging Face page](https://huggingface.co/instructlab). -We regularly post model builds on the project's [Hugging Face page](https://huggingface.co/instructlab). +## [Social Media](#social-media) -## [Social Media](#Social-Media) +The InstructLab project community maintainers will post regular updates on these social media services. Our project hashtag is `#InstructLab`. -The InstructLab project community maintainers will post regular updates on these social media services. Our project hashtag is #InstructLab. - -### [LinkedIn](#LinkedIn) +### [LinkedIn](#linkedin) Follow our [LinkedIn page](https://www.linkedin.com/company/instructlab) for updates. -### [X (Twitter)](#X) +### [X (Twitter)](#x-twitter) Follow us on [X](https://twitter.com/instructlab) for our latest Tweets. Er, Xes. Er, posts of some sort. -### [YouTube](#YouTube) +### [YouTube](#youtube) -Subscribe to the [InstructLab YouTube channel](https://www.youtube.com/@InstructLab) for regular updates when we post video tutorials or playlists of talks about InstructLab from conferences and meetups. +Subscribe to the [InstructLab YouTube channel](https://www.youtube.com/@InstructLab) for regular updates when we post video tutorials or playlists of talks about InstructLab from conferences and meetups. ## [Submitting content](#submitting-content) -Have you made a video tutorial, how to document, or other content that would be helpful to folks involved in the InstructLab community? Thank you! - -We would love to help you share it. Please [file an issue in the Community Repo](https://github.com/instructlab/community/issues) or send a note to the [community email](https://groups.google.com/a/instructlab.ai/g/community) list to let us know about what you have created. - +Have you made a video tutorial, how to document, or other content that would be helpful to folks involved in the InstructLab community? Thank you! +We would love to help you share it. Please [file an issue in the Community Repo](https://github.com/instructlab/community/issues) or send a note to the [community email](https://groups.google.com/a/instructlab.ai/g/community) list to let us know about what you have created. diff --git a/FAQ.md b/FAQ.md index da1128c..e6f0aec 100644 --- a/FAQ.md +++ b/FAQ.md @@ -2,7 +2,7 @@ Last updated: April 2024 -# Table of Contents +## Table of Contents - [Document summary](#document-summary) - [General FAQ](#general-faq) @@ -37,18 +37,18 @@ Last updated: April 2024 This page serves as a comprehensive FAQ for the InstructLab project, detailing how it works, how to begin contribution, and the goals behind the project. Key information includes: -* **InstructLab Overview**: This open source project allows users to interact with and train the Merlinite-7b AI Large Language Model (LLM) by contributing skills and knowledge. -* **LAB Method**: A synthetic data-based tuning method for LLMs consisting of a taxonomy-driven data curation process, a synthetic data generator, and two-phased training with replay buffers. -* **Contribution Process**: Contributors can add skills or knowledge to the LLM by creating YAML files and testing changes locally before submitting a pull request to InstructLab’s GitHub repository. -* **Project Goals**: To democratize contributions to AI and LLMs, allowing rapid model development through community collaboration facilitated by weekly builds that integrate community contributions. +- **InstructLab Overview**: This open source project allows users to interact with and train the Merlinite-7b AI Large Language Model (LLM) by contributing skills and knowledge. +- **LAB Method**: A synthetic data-based tuning method for LLMs consisting of a taxonomy-driven data curation process, a synthetic data generator, and two-phased training with replay buffers. +- **Contribution Process**: Contributors can add skills or knowledge to the LLM by creating YAML files and testing changes locally before submitting a pull request to InstructLab’s GitHub repository. +- **Project Goals**: To democratize contributions to AI and LLMs, allowing rapid model development through community collaboration facilitated by weekly builds that integrate community contributions. ## Documentation disclaimer There are currently three repositories that contain documentation crucial to getting users starting with the project: -* [Community](https://github.com/instructlab/community) This repository shares InstructLab's activity and collaboration details across the community and include the most current information about the project. It should be approached as the primary repository for getting started, and contains procedures and links to relevant information to make the process as simple as possible. -* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` CLI tool. It provides information about how to download the `ilab` CLI, how to contribute to the `ilab` CLI tool, among others. -* [Taxonomy Tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. It provides information about what skills and knowledge are, how to create a pull request to contribute to the AI model, and expectations for pull request review. +- [Community](https://github.com/instructlab/community) This repository shares InstructLab's activity and collaboration details across the community and include the most current information about the project. It should be approached as the primary repository for getting started, and contains procedures and links to relevant information to make the process as simple as possible. +- [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` CLI tool. It provides information about how to download the `ilab` CLI, how to contribute to the `ilab` CLI tool, among others. +- [Taxonomy Tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. It provides information about what skills and knowledge are, how to create a pull request to contribute to the AI model, and expectations for pull request review. As this project grows, documentation and its organization will change. Members of this project will be made aware of significant changes and updates made to documentation. @@ -88,11 +88,11 @@ You can begin your contribution journey by reading over the [Contributing](https When you're ready to start contributing, you can follow the [Getting started](https://github.com/instruct-lab/community/blob/main/README.md#getting-started-with-the-instructlab-project-workstreams) guide. This guide shows you how to -* Install the `ilab` CLI. -* Deploy the LLM locally. -* Add skills or knowledge and train to the local LLM with your data. -* Create a pull request and add your information to the InstructLab taxonomy. -* Get reviews on your pull requests +- Install the `ilab` CLI. +- Deploy the LLM locally. +- Add skills or knowledge and train to the local LLM with your data. +- Create a pull request and add your information to the InstructLab taxonomy. +- Get reviews on your pull requests ### I'm having problems with the `ilab` CLI tool. What should I do? @@ -120,20 +120,19 @@ In the context of InstructLab, a [_skill_](https://github.com/instruct-lab/taxon InstructLab skills are broken down into two main categories: -* [**Composition skills.**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#compositional-skills) Composition or _performative_ skills allow AI models to perform specific tasks or functions. With InstructLab, there are two types of composition skills: - * [**Freeform compositional skills**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#freeform-compositional-skills) are performative skills that do not require additional context. For example, to train an AI model to write a poem, you would provide examples of poems. - * [**Grounded compositional skills**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#grounded-compositional-skills) are performative skills that require additional context. One example is how an AI model reads the value of a cell in a table layout. To create the grounded skill to read a table formatted in Markdown, the additional context might be an example table layout. -* **Foundational skills.** Foundational skills are skills like math, reasoning, and coding. +- [**Composition skills.**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#compositional-skills) Composition or _performative_ skills allow AI models to perform specific tasks or functions. With InstructLab, there are two types of composition skills: + - [**Freeform compositional skills**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#freeform-compositional-skills) are performative skills that do not require additional context. For example, to train an AI model to write a poem, you would provide examples of poems. + - [**Grounded compositional skills**](https://github.com/instruct-lab/community/blob/main/docs/SKILLS_GUIDE.md#grounded-compositional-skills) are performative skills that require additional context. One example is how an AI model reads the value of a cell in a table layout. To create the grounded skill to read a table formatted in Markdown, the additional context might be an example table layout. +- **Foundational skills.** Foundational skills are skills like math, reasoning, and coding. **Note**: Foundational skills are not currently being accepted. - -Skills are written in a YAML file and submitted to the InstructLab upstream project for review. See the [Skills: YAML examples](https://github.com/instruct-lab/taxonomy/blob/main/README.md#skills-yaml-examples) for different types of examples. +Skills are written in a YAML file and submitted to the InstructLab upstream project for review. See the [Skills: YAML examples](https://github.com/instruct-lab/taxonomy/blob/main/README.md#skills-yaml-examples) for different types of examples. ### What is “knowledge”? [_Knowledge_](https://github.com/instruct-lab/taxonomy/blob/main/README.md#getting-started-with-knowledge-contributions) consists of data and facts. When creating knowledge for an AI model, you are providing it with additional data and information to answer questions more accurately. Whereas skills are the information that trains an AI model on how to do something, knowledge is based on the AI model’s ability to answer questions that involve facts, data, or references. -Like skills, knowledge submissions are submitted in YAML format to the InstructLab upstream project for review. See the [Knowledge: YAML examples](https://github.com/instruct-lab/taxonomy/blob/main/README.md#knowledge-yaml-examples) for different types of examples. +Like skills, knowledge submissions are submitted in YAML format to the InstructLab upstream project for review. See the [Knowledge: YAML examples](https://github.com/instruct-lab/taxonomy/blob/main/README.md#knowledge-yaml-examples) for different types of examples. ### Is the project looking for certain types of skill contributions? @@ -169,15 +168,15 @@ For code review, the project maintainers use LGTM (Looks Good to Me) in comments For skills and knowledge PRs, your PR will be checked to ensure it is relevant, actionable, and has all the information necessary for the approval team to review and merge the PR. The Triage team will use labels to manage the state and action of PRs as well as provide feedback to contributors based upon the following review guidelines: -* Does the PR have the pull request template information filled out? -* Did all the PR checks pass? -* Does the skill have three or more examples? -* Are the YAML fields correct? -* No PII in content -* Does this content include anything documented in the project's [Avoid these Topics](https://github.com/instruct-lab/community/blob/main/docs/README.md#avoid-these-topics) guidelines? -* Does it adhere to the [Code of Conduct](https://github.com/instruct-lab/taxonomy/blob/main/CONTRIBUTING.md#code-of-conduct) guidelines? -* Was a response clearly generated by the LLM? - +- Does the PR have the pull request template information filled out? +- Did all the PR checks pass? +- Does the skill have three or more examples? +- Are the YAML fields correct? +- No PII in content +- Does this content include anything documented in the project's [Avoid these Topics](https://github.com/instruct-lab/community/blob/main/docs/README.md#avoid-these-topics) guidelines? +- Does it adhere to the [Code of Conduct](https://github.com/instruct-lab/taxonomy/blob/main/CONTRIBUTING.md#code-of-conduct) guidelines? +- Was a response clearly generated by the LLM? + ### How long will it take for my pull request to be reviewed? Due to the large number of contributions currently being received, it is difficult to provide an exact timeline for reviewing your pull request. @@ -192,7 +191,7 @@ InstructLab and the Merlinite-7b project are distributed under [Apache License, ### What is the content license for InstructLab documentation? -Unless otherwise specified, all documentation for InstructLab is licensed under the [CC-BY-4.0 license from Creative Commons](https://creativecommons.org/licenses/by/4.0/). +Unless otherwise specified, all documentation for InstructLab is licensed under the [CC-BY-4.0 license from Creative Commons](https://creativecommons.org/licenses/by/4.0/). ### Am I required to license code submissions to InstructLab under the Apache 2.0 license? @@ -202,16 +201,16 @@ Yes. Code contributions to the InstructLab project are subject to the terms and It is recommended that third-party content be licensed with an open data license that does not restrict commercial use or the creation of derivative works, including the following licenses: -* CC0 -* CDLA-Permissive -* CC-BY-4.0 -* CC-BY-4.0 SA -* Apache 2.0 -* MIT +- CC0 +- CDLA-Permissive +- CC-BY-4.0 +- CC-BY-4.0 SA +- Apache 2.0 +- MIT ### Do submissions to the project require a contributor license agreement of some kind? -The InstructLab project follows the same approach (the [Developer's Certificate of Origin 1.1 (DCO)](https://developercertificate.org/)) that [the Linux Kernel community uses](https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin) to manage code contributions. Unless the file says otherwise for this project, the relevant open source license is [the Apache License, Version 2.0](https://github.com/instruct-lab/taxonomy/blob/main/LICENSE). When submitting a patch for review, you must include a sign-off statement in the commit message. See the ["Legal" ](https://github.com/instruct-lab/taxonomy/blob/main/CONTRIBUTING.md#legal) section of the Contributing document. +The InstructLab project follows the same approach (the [Developer's Certificate of Origin 1.1 (DCO)](https://developercertificate.org/)) that [the Linux Kernel community uses](https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin) to manage code contributions. Unless the file says otherwise for this project, the relevant open source license is [the Apache License, Version 2.0](https://github.com/instruct-lab/taxonomy/blob/main/LICENSE). When submitting a patch for review, you must include a sign-off statement in the commit message. See the ["Legal"](https://github.com/instruct-lab/taxonomy/blob/main/CONTRIBUTING.md#legal) section of the Contributing document. You can find more information about useful tools for managing DCO sign-off in our [Community Contributions Guide](https://github.com/instructlab/community/blob/main/CONTRIBUTING.md#developer-certificate-of-origin-dco). @@ -223,7 +222,7 @@ The latest version of InstructLab can be downloaded using the `ilab download` CL Currently, the best method for communicating with peers and project maintainers is in the Community Slack Channel. Visit our [InstructLab Slack Workspace Guide](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md) for information on how to join. -TODO: Update with mailing list details once these are created. Related issue https://github.com/instructlab/community/issues/89 +TODO: Update with mailing list details once these are created. Related issue ### What are the software and hardware requirements for using InstructLab? @@ -231,15 +230,16 @@ The local training is the most hardware intensive part of this process. Your har To run and train InstructLab locally, you must meet the following requirements: -* A Linux-based operating system -* An Apple Silicon M1, M2, or M3 system -* Python 3.9 or later, including the development headers -* Approximately 10GB of free disk space to get through the `ilab generate` step -* Approximately 60GB of free disk space is needed to run the entire process locally on Apple hardware -* About 32 GB RAM +- A Linux-based operating system +- An Apple Silicon M1, M2, or M3 system +- Python 3.9 or later, including the development headers +- Approximately 10GB of free disk space to get through the `ilab generate` step +- Approximately 60GB of free disk space is needed to run the entire process locally on Apple hardware +- About 32 GB RAM + ## Additional Resources -Additional resources, including the Code of Conduct, Code of Conduct Committee members, how to contribute, how to join the Slack channel, and more, can be found in the following repositories: +Additional resources, including the Code of Conduct, Code of Conduct Committee members, how to contribute, how to join the Slack channel, and more, can be found in the following repositories: [InstructLab Taxonomy Repository](https://github.com/instructlab/taxonomy) @@ -249,6 +249,5 @@ Additional resources, including the Code of Conduct, Code of Conduct Committee m Slack and communication -* [Joining the Slack Channel](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md) -* [Slack Moderation](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md) - +- [Joining the Slack Channel](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md) +- [Slack Moderation](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md) diff --git a/InstructLabSlackGuide.md b/InstructLabSlackGuide.md index 72f17e2..b770a4d 100644 --- a/InstructLabSlackGuide.md +++ b/InstructLabSlackGuide.md @@ -1,30 +1,30 @@ # InstructLab Slack Workspace Guide -The purpose of this document is to inform folks about how to join the InstructLab Slack Workspace and document the channels therein. We look forward to +The purpose of this document is to inform folks about how to join the InstructLab Slack Workspace and document the channels therein. We look forward to meeting everyone and welcoming you on Slack! ## Overview -The InstructLab Slack workspace resides at https://instruct-lab.slack.com. You must join via this [invitation link](https://join.slack.com/t/instruct-lab/shared_invite/zt-2ginke0oz-SxvNYZCqBTDy5wzAi9WRlA) +The InstructLab Slack workspace resides at . You must join via this [invitation link](https://join.slack.com/t/instruct-lab/shared_invite/zt-2ginke0oz-SxvNYZCqBTDy5wzAi9WRlA) -Upon joining, you will automatically be added to our #announce channel. You are welcome and encouraged to join other channels. +Upon joining, you will automatically be added to our `#announce` channel. You are welcome and encouraged to join other channels. All discussions in the InstructLab Slack are governed by our [project code of conduct](https://github.com/instructlab/community/blob/main/CODE_OF_CONDUCT.md). ## Channel Overview -- #dev Cross-project coordination discussion, such as topics that cover both frontend and backend development for InstructLab -- #admin Place to get non-technical help. If you don't know where to go after reading this guide, join this channel for [air traffic control](https://en.wikipedia.org/wiki/Air_traffic_control). -- #announce Project wide announcements such as releases, reminders about community calls, and celebrating new maintainers. This channel is moderated (only [Workspace Adminstrators](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md#workspace-administrators) can post) and low-traffic. -- #backend Backend work for the InstructLab project, including pipeline for synthetic data generation, training, model evaluation, and publishing. -- #community Place to discuss community matters such as improving the contributor experience, getting help reviewing a presentation about InstructLab you want to give at a meetup, or learning how you can contribute to InstructLab beyond software development. -- #contribhelp General questions about getting started as an InstructLab contributor. This channel is the place to go if you need help with your first pull request. -- #docs Documentation team discussions and questions about documentation. -- #infra Topics related to project infrastructure, such as repo maintenance, planned outages, or who has the keys to the social media accounts. -- #frontend Frontend work for the InstructLab project, including the CLI tool and User Interface -- #social Place to chat and enjoy camaraderie with fellow community members. -- #triage Triage team discussions. -- #users InstructLab users forum for troubleshooting and sharing tips and tricks. +- `#dev` Cross-project coordination discussion, such as topics that cover both frontend and backend development for InstructLab +- `#admin` Place to get non-technical help. If you don't know where to go after reading this guide, join this channel for [air traffic control](https://en.wikipedia.org/wiki/Air_traffic_control). +- `#announce` Project wide announcements such as releases, reminders about community calls, and celebrating new maintainers. This channel is moderated (only [Workspace Adminstrators](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md#workspace-administrators) can post) and low-traffic. +- `#backend` Backend work for the InstructLab project, including pipeline for synthetic data generation, training, model evaluation, and publishing. +- `#community` Place to discuss community matters such as improving the contributor experience, getting help reviewing a presentation about InstructLab you want to give at a meetup, or learning how you can contribute to InstructLab beyond software development. +- `#contribhelp` General questions about getting started as an InstructLab contributor. This channel is the place to go if you need help with your first pull request. +- `#docs` Documentation team discussions and questions about documentation. +- `#infra` Topics related to project infrastructure, such as repo maintenance, planned outages, or who has the keys to the social media accounts. +- `#frontend` Frontend work for the InstructLab project, including the CLI tool and User Interface +- `#social` Place to chat and enjoy camaraderie with fellow community members. +- `#triage` Triage team discussions. +- `#users` InstructLab users forum for troubleshooting and sharing tips and tricks. ## Usings Threaded Replies in Slack @@ -40,7 +40,7 @@ To learn how to report abuse - and to whom you will be reporting - please see ou ## Having Trouble Joining? -If you are having trouble joining the InstructLab Slack, please file an issue in the [community repo](https://github.com/instructlab/community/issues) so we can help you. +If you are having trouble joining the InstructLab Slack, please file an issue in the [community repo](https://github.com/instructlab/community/issues) so we can help you. TODO: Update with email address to get help once these are set up. @@ -48,6 +48,5 @@ TODO: Update with email address to get help once these are set up. InstructLab is an open source project and we value defaulting to open in all of our community communications. There are some cases where discussions must happen in private. For the sake of transparency, we are documenting these private channels and what they are used for. -- #code-of-conduct-committee Space for the InstructLab [Code of Conduct Committee](https://github.com/instructlab/community/blob/main/COCC.md) to discuss any reports of harassement or other violations of the project Code of Conduct and how to respond to them. -- #mods Space for the InstructLab [Workspace Administrators](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md#workspace-administrators) to confer privately only when necessary. We default to open and hold each other accountable to do so. - +- `#code-of-conduct-committee` Space for the InstructLab [Code of Conduct Committee](https://github.com/instructlab/community/blob/main/COCC.md) to discuss any reports of harassement or other violations of the project Code of Conduct and how to respond to them. +- `#mods` Space for the InstructLab [Workspace Administrators](https://github.com/instructlab/community/blob/main/InstructLabSlackModerationGuide.md#workspace-administrators) to confer privately only when necessary. We default to open and hold each other accountable to do so. diff --git a/InstructLabSlackModerationGuide.md b/InstructLabSlackModerationGuide.md index 1e7f99e..0177802 100644 --- a/InstructLabSlackModerationGuide.md +++ b/InstructLabSlackModerationGuide.md @@ -3,19 +3,20 @@ The purpose of this document is both describe how users of the InstructLab's [Slack workspace](https://instruct-lab.slack.com) can report abuse in the Slack workspace and to provide space administrators with an easy to use how to guide for channel moderation. -# Reporting Abuse +## Reporting Abuse -Should any community members using the InstructLab Slack workspace feel that they have experienced behavior that violates our [project Code of Conduct](https://github.com/instruct-lab/community/blob/main/CODE_OF_CONDUCT.md), they are welcome and encouraged to contact the members of the [Code of Conduct Committee](https://github.com/instruct-lab/community/blob/main/COCC.md) for help. Mentioning @cocc will page all members of the committee so that they can assist you. +Should any community members using the InstructLab Slack workspace feel that they have experienced behavior that violates our [project Code of Conduct](https://github.com/instruct-lab/community/blob/main/CODE_OF_CONDUCT.md), they are welcome and encouraged to contact the members of the [Code of Conduct Committee](https://github.com/instruct-lab/community/blob/main/COCC.md) for help. Mentioning `@cocc` will page all members of the committee so that they can assist you. -In the event that you do not receive help within a timely fashion - and we will do our very best to respond right away - you can ask for help from the workspace admins by either joining channel #admin or mentioning @admins. +In the event that you do not receive help within a timely fashion - and we will do our very best to respond right away - you can ask for help from the workspace admins by either joining channel `#admin` or mentioning `@admins`. -# Moderation Guide +## Moderation Guide Moderation activities can only be performed by users who are designated as workspace administrators. ## Workspace Administrators At time of writing, our workspace adminstrators/moderators are as follows: + * Aakanksha Duggal * Ali Maredia * Alina Ryan @@ -40,14 +41,15 @@ At time of writing, our workspace adminstrators/moderators are as follows: ## How We Moderate -## Deleting Inappropriate Comments +## Deleting Inappropriate Comments + Upon report of abuse to the [Code of Conduct Committee](https://github.com/instruct-lab/community/blob/main/COCC.md) or, alternatively if needed to the workspace administrators due to a coverage gap, the appropriate parties will assess the situation. -The first step will be to remind folks to abide by the [project Code of Conduct](https://github.com/instruct-lab/community/blob/main/CODE_OF_CONDUCT.md). +The first step will be to remind folks to abide by the [project Code of Conduct](https://github.com/instruct-lab/community/blob/main/CODE_OF_CONDUCT.md). Inappropriate or offensive messages [will be deleted](https://slack.com/help/articles/202395258-Edit-or-delete-messages#delete-a-message). -Deleting a message shall be done at the sole discretion of the Code of Conduct Committee and/or workspace administrators. +Deleting a message shall be done at the sole discretion of the Code of Conduct Committee and/or workspace administrators. ### How to delete a message @@ -61,10 +63,10 @@ If a user is a repeat offender, after being warned, their account can be [deacti ### Removing someone from a channel -- By default, Workspace Owners and Admins can remove people from public channels, and members can remove people from private channels. -- Anyone can be removed from a channel by those with permission. -- All members and guests need to be added back to a private channel to rejoin it, and guests also need to be added back to a public channel to rejoin it. -- It's not possible to remove people from the #announce channel. However, posting in this channel is restricted to workspace administrators by default. +* By default, Workspace Owners and Admins can remove people from public channels, and members can remove people from private channels. +* Anyone can be removed from a channel by those with permission. +* All members and guests need to be added back to a private channel to rejoin it, and guests also need to be added back to a public channel to rejoin it. +* It's not possible to remove people from the #announce channel. However, posting in this channel is restricted to workspace administrators by default. ![image](https://github.com/instructlab/community/assets/615883/2e1bac77-4674-4f9f-ab48-dcd2a5f590d0) diff --git a/MAINTAINERS.md b/MAINTAINERS.md index d22eaf8..4d3370c 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -25,7 +25,6 @@ Team which has full maintainer access to the CLI repository - [spzala](https://github.com/spzala) - [xukai92](https://github.com/xukai92) - ### CLI Triagers Team that can manage Issues and Pull Requests but cannot merge code to the CLI repository @@ -42,7 +41,6 @@ Team that can manage Issues and Pull Requests but cannot merge code to the CLI r - [mairin](https://github.com/mairin) - [oindrillac](https://github.com/oindrillac) - ## Community ### Community Maintainers @@ -61,7 +59,6 @@ Team which has full maintainer access to the Community repository - [mingxzhao](https://github.com/mingxzhao) - [mmcelaney](https://github.com/mmcelaney) - ## Enhancements ### Enhancements Triagers @@ -71,7 +68,6 @@ Team which has full maintainer access to the Enhancements repository - [nathan-weinberg](https://github.com/nathan-weinberg) - [russellb](https://github.com/russellb) - ## InstructLabBot ### InstructLab Bot Maintainers @@ -83,7 +79,6 @@ Team which has full maintainer access to the InstructLab Bot repository - [russellb](https://github.com/russellb) - [vishnoianil](https://github.com/vishnoianil) - ### InstructLab Bot Triagers Team that can manage Issues and Pull Requests but cannot merge code to the InstructLab Bot repository @@ -93,7 +88,6 @@ Team that can manage Issues and Pull Requests but cannot merge code to the Instr - [nathan-weinberg](https://github.com/nathan-weinberg) - [russellb](https://github.com/russellb) - ## Schema ### Schema Maintainers @@ -103,7 +97,6 @@ Team which has full maintainer access to the Schema repository - [bjhargrave](https://github.com/bjhargrave) - [jjasghar](https://github.com/jjasghar) - ## Taxonomy ### Taxonomy Approvers @@ -123,7 +116,6 @@ Team which has approval permissions to the Taxonomy repository - [shivchander](https://github.com/shivchander) - [xukai92](https://github.com/xukai92) - ### Taxonomy Maintainers Team which has full maintainer access to the Taxonomy repository @@ -147,7 +139,6 @@ Team which has full maintainer access to the Taxonomy repository - [xukai92](https://github.com/xukai92) - [yhwang](https://github.com/yhwang) - ### Taxonomy Triagers Team that can manage Issues and Pull Requests but cannot merge code to the Taxonomy repository @@ -165,7 +156,6 @@ Team that can manage Issues and Pull Requests but cannot merge code to the Taxon - [oindrillac](https://github.com/oindrillac) - [xukai92](https://github.com/xukai92) - ## Website ### Website Maintainers @@ -176,5 +166,3 @@ Team which has full maintainer access to the Website repository - [joesepi](https://github.com/joesepi) - [mairin](https://github.com/mairin) - [mscherer](https://github.com/mscherer) - - diff --git a/QUICK_START_GUIDE.md b/QUICK_START_GUIDE.md index 5eccc8f..077d25a 100644 --- a/QUICK_START_GUIDE.md +++ b/QUICK_START_GUIDE.md @@ -1,6 +1,7 @@ # Quick Start Guide ## Table of contents + 1. [Install ilab](#install-ilab) 2. [Initialize ilab](#%EF%B8%8F-initialize-ilab) 3. [Download the model](#-download-the-model) @@ -10,7 +11,6 @@ This Quick Start Guide will help you get InstructLab working on your laptop or machine and is expected to take approximately XX minutes. If you'd like more details on this process, see [the Taxonomy README](https://github.com/instructlab/taxonomy/blob/main/README.md) or if you'd like more information on the `cli`, please see [the ilab CLI README](https://github.com/instructlab/instructlab/blob/main/README.md) - ### Install `ilab` ## 📋 Requirements @@ -20,7 +20,6 @@ working on your laptop or machine and is expected to take approximately XX minut - Python 3.9+ - Approximately 60GB disk space (entire process) - ## ✅ Getting started ### 🧰 Installing `ilab` @@ -43,6 +42,7 @@ working on your laptop or machine and is expected to take approximately XX minut mkdir instructlab cd instructlab ``` + > **NOTE:** The following steps in this document use [Python venv](https://docs.python.org/3/library/venv.html) for virtual environments. However, if you use another tool such as [pyenv](https://github.com/pyenv/pyenv) or [Miniforge](https://github.com/conda-forge/miniforge) for managing Python environments on your machine continue to use that tool instead. Otherwise, you may have issues with packages that are installed but not found in `venv`. 3. Install and activate your `venv` environment by running the following command: @@ -52,6 +52,7 @@ working on your laptop or machine and is expected to take approximately XX minut source venv/bin/activate pip install https://github.com/instructlab/instructlab.git@stable ``` + > **NOTE**: ⏳ `pip install` may take some time, depending on your internet connection. 4. From your `venv` environment, verify `ilab` is installed correctly, by running the `ilab` command. @@ -60,8 +61,9 @@ working on your laptop or machine and is expected to take approximately XX minut ilab ``` - #### Example output: - ``` +#### Example output + + ```shell (venv) $ ilab Usage: ilab [OPTIONS] COMMAND [ARGS]... @@ -101,8 +103,7 @@ working on your laptop or machine and is expected to take approximately XX minut ilab init ``` - - #### Example output: + Example output: ```bash Welcome to InstructLab CLI. This guide will help you set up your environment. @@ -116,7 +117,7 @@ working on your laptop or machine and is expected to take approximately XX minut **Optional**: If you want to point to an existing local clone of the `taxonomy` repository, you can pass the path interactively or alternatively with the `--taxonomy-path` flag. - #### Example output: + Example output: ```bash (venv) $ ilab init @@ -128,11 +129,12 @@ working on your laptop or machine and is expected to take approximately XX minut Generating `config.yaml` in the current directory... Initialization completed successfully, you're ready to start using `lab`. Enjoy! ``` + `ilab` will use the default configuration file unless otherwise specified. You can override this behavior with the `--config` parameter for any `ilab` command. ### 📥 Download the model -* Run the `ilab download`command. +- Run the `ilab download`command. ```bash ilab download @@ -151,7 +153,7 @@ working on your laptop or machine and is expected to take approximately XX minut ### 🍴 Serving the model -* Serve the model by running the following command: +- Serve the model by running the following command: ```bash ilab serve @@ -182,7 +184,7 @@ Because you're serving the model in one terminal window, you will have to create Now that you have a working environment, you should see how we need to give it new knowledge. -Ask it a question (the default downloaded model (from `ilab download` and `ilab chat`) gets this wrong, see https://github.com/instructlab/taxonomy/pull/659): +Ask it a question (the default downloaded model (from `ilab download` and `ilab chat`) gets this wrong, see ): > When was the first British women's softball league established? @@ -267,6 +269,7 @@ The answer may be incorrect, so lets add knowledge that teaches the model the co ```bash ilab chat ``` + - Ask the original questions again: > When was the first British women's softball league established? diff --git a/README.md b/README.md index 93bf841..aa6ce30 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # Welcome to the InstructLab Community repository🔬 -The mission of the InstructLab (**L**arge-scale **A**lignment for chat**B**ots) project is to leverage innovative techniques that overcome challenges in Large Language Model (LLM) training. InstructLab uses a taxonomy based curation process, along with synthetic data generation, that allows the open source community to submit contributions to existing LLMs in an accessible way. +The mission of the InstructLab (**L**arge-scale **A**lignment for chat**B**ots) project is to leverage innovative techniques that overcome challenges in Large Language Model (LLM) training. InstructLab uses a taxonomy based curation process, along with synthetic data generation, that allows the open source community to submit contributions to existing LLMs in an accessible way. InstructLab is made up of several projects that are defined as codebases and services with different release cycles. Collectively, these enable large-model development. This repository shares InstructLab's activity and collaboration details across the community and include the most current information about the project. Related repositories include the following: -* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` command-line interface (CLI) tool. -* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. +* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` command-line interface (CLI) tool. +* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. Contributing new features, resolving bugs and issues, and refining the documentation experience through pull requests are welcome. More information about contributing to the InstructLab Project, contributor roles, governance and legal, and licenses can be found in proceeding sections of this document. @@ -13,9 +13,9 @@ Contributing new features, resolving bugs and issues, and refining the documenta The goals of this open source community includes the following: -- Drive adoption of the InstructLab tooling and model API standard. -- Grow and an ecosystem of contribution driven open models -- Establish deployable patterns, practices, and evidence for sophisticated use cases. +* Drive adoption of the InstructLab tooling and model API standard. +* Grow and an ecosystem of contribution driven open models +* Establish deployable patterns, practices, and evidence for sophisticated use cases. ## Getting Started with the InstructLab Project workstreams🥼 @@ -23,39 +23,39 @@ InstructLab (**L**arge-scale **A**lignment for chat**B**ots) is an open source i The following documentation shows you an overview of the workflow, and the resources needed, to get started with InstructLab. -## 💻 InstructLab (`ilab`) Workflow +## 💻 InstructLab (`ilab`) Workflow ### Installing and interacting with the `ilab` CLI tool -The `ilab` tool allows you to interact with the IBM AI model `Merlinite`, contribute your own information, and train the model locally. +The `ilab` tool allows you to interact with the IBM AI model `Merlinite`, contribute your own information, and train the model locally. > **Note:** Before proceeding, it might be beneficial to check out the [Contributing](https://github.com/instruct-lab/community/blob/main/CONTRIBUTING.md) guide for an overview of contributing practices and expectations. Additionally, you should consider joining the [InstructLab community Slack channel](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). -1. Navigate to the `ilab` CLI repository and follow the instructions in the [README.md](https://github.com/instructlab/instructlab/blob/main/README.md). The README.md instructs you on how to perform the following: +1. Navigate to the `ilab` CLI repository and follow the instructions in the [README.md](https://github.com/instructlab/instructlab/blob/main/README.md). The README.md instructs you on how to perform the following: a. In the [Getting started](https://github.com/instructlab/instructlab/blob/main/README.md#-getting-started) section of the README.md file, you can install the `ilab` tool, set up your local environment, and download the IBM `Merlinite` AI model. If you run into any issues, you can find many solutions in the [in the CLI repository's discussion board](https://github.com/instructlab/instructlab/discussions). - b. You can then create your own data sets to feed into and train the model. In the taxonomy project, there are two types of data you can serve to the model: skills and knowledge. There are a few different types of skills and knowledge you can create. For more detailed information on the types, see the Taxonomy [README.md](https://github.com/instructlab/taxonomy/blob/main/README.md#welcome-to-the-instructlab-taxonomy). + b. You can then create your own data sets to feed into and train the model. In the taxonomy project, there are two types of data you can serve to the model: skills and knowledge. There are a few different types of skills and knowledge you can create. For more detailed information on the types, see the Taxonomy [README.md](https://github.com/instructlab/taxonomy/blob/main/README.md#welcome-to-the-instructlab-taxonomy). - c. In your local taxonomy repository, generated after the [Initialize ilab](https://github.com/instructlab/instructlab/blob/main/README.md#%EF%B8%8F-initialize-ilab) step, navigate to the path that you want to add information to. You can see a flow chart of the paths in this file [taxonomy_diagram](https://github.com/instructlab/taxonomy/blob/main/docs/taxonomy_diagram.png). Create a `qna.yaml` file in that path with your contributions. + c. In your local taxonomy repository, generated after the [Initialize ilab](https://github.com/instructlab/instructlab/blob/main/README.md#%EF%B8%8F-initialize-ilab) step, navigate to the path that you want to add information to. You can see a flow chart of the paths in this file [taxonomy_diagram](https://github.com/instructlab/taxonomy/blob/main/docs/taxonomy_diagram.png). Create a `qna.yaml` file in that path with your contributions. - d. [Serve and train the model](https://github.com/instructlab/instructlab/blob/main/README.md#-train-the-model) with your contributions to see if the model can answer questions more accurately. + d. [Serve and train the model](https://github.com/instructlab/instructlab/blob/main/README.md#-train-the-model) with your contributions to see if the model can answer questions more accurately. - e. Congratulations! You trained an AI model locally! + e. Congratulations! You trained an AI model locally! -### Opening a pull request in the taxonomy repository with your new skills or knowledge! +### Opening a pull request in the taxonomy repository with your new skills or knowledge If your contributions improved the model locally, you can contribute your files to the main AI model through the taxonomy repository. For more information see [CONTRIBUTING.md](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md#pull-request-review) in the taxonomy repository. -1. To contribute your knowledge and skills to the taxonomy repository, follow the documentation in [Contribute knowledge and skills to the taxonomy](https://github.com/instructlab/taxonomy/blob/main/README.md#contribute-knowledge-and-skills-to-the-taxonomy). +1. To contribute your knowledge and skills to the taxonomy repository, follow the documentation in [Contribute knowledge and skills to the taxonomy](https://github.com/instructlab/taxonomy/blob/main/README.md#contribute-knowledge-and-skills-to-the-taxonomy). > **IMPORTANT:** Ensure that your files and contributions follow the proper YAML format, see examples in the [Skills: YAML format](https://github.com/instructlab/taxonomy/blob/main/README.md#skills-yaml-examples) file. ### Getting reviews on pull requests -There are teams of contributors from Red Hat and IBM that will review your pull request and determine if it can be merged in the taxonomy repository. For more information, see the [Triaging contributions](https://github.com/instructlab/taxonomy/blob/main/docs/triaging/triaging-contributions.md) documentation. +There are teams of contributors from Red Hat and IBM that will review your pull request and determine if it can be merged in the taxonomy repository. For more information, see the [Triaging contributions](https://github.com/instructlab/taxonomy/blob/main/docs/triaging/triaging-contributions.md) documentation. -### See your contributions impact an AI model! +### See your contributions impact an AI model The IBM model `Merlinite` builds regularly. Sometime after your pull request is merged, Merlinite is updated and you can see locally that the model improved with the skill or knowledge you taught it. @@ -65,26 +65,26 @@ Help on open source projects is always welcome and there is always something tha To contribute code or documentation, please submit a pull request to the relevant repository. Note that contribution to any repository has its own set of requirements and expectations, and users should familiar themselves with those expectations before contributing. -- For more information about general contribution practices, see the [Contributing](https://github.com/instructlab/community/blob/main/CONTRIBUTING.md) guide. -- For more information about contributing to the taxonomy repository, see the [Taxonomy's contribution guide](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md). -- For more information about contributing to the InstructLab CLI repository, see the [Instructlab contribution guide](https://github.com/instructlab/instructlab/blob/main/CONTRIBUTING/CONTRIBUTING.md). +* For more information about general contribution practices, see the [Contributing](https://github.com/instructlab/community/blob/main/CONTRIBUTING.md) guide. +* For more information about contributing to the taxonomy repository, see the [Taxonomy's contribution guide](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md). +* For more information about contributing to the InstructLab CLI repository, see the [Instructlab contribution guide](https://github.com/instructlab/instructlab/blob/main/CONTRIBUTING/CONTRIBUTING.md). ### Contributor roles -The project welcomes new contributors. Not all contributors are able to provide sustained contributions, but they are always welcome. [The contributor roles](https://github.com/instructlab/community/blob/main/CONTRIBUTOR_ROLES.md) document outlines the various roles to support contributors and help them grow responsibility in the various InstructLab projects. These roles are subject to change, and new roles will be added as necessary. +The project welcomes new contributors. Not all contributors are able to provide sustained contributions, but they are always welcome. [The contributor roles](https://github.com/instructlab/community/blob/main/CONTRIBUTOR_ROLES.md) document outlines the various roles to support contributors and help them grow responsibility in the various InstructLab projects. These roles are subject to change, and new roles will be added as necessary. -#### Maintainers +#### Maintainers -Project Maintainers are first and foremost contributors that have shown they are committed to the long term success of a project. Maintainership is about building trust with the community and being a person that everyone can depend on to make consistent decisions in the best interest of the project. With enough time and experience, contributors can apply to become Maintainers. The current list of Maintainers can be found in the +Project Maintainers are first and foremost contributors that have shown they are committed to the long term success of a project. Maintainership is about building trust with the community and being a person that everyone can depend on to make consistent decisions in the best interest of the project. With enough time and experience, contributors can apply to become Maintainers. The current list of Maintainers can be found in the [Maintainers](https://github.com/instructlab/community/blob/main/MAINTAINERS.md) file. ## Governance & Legal -- [InstructLab Community Governance](governance.md) +* [InstructLab Community Governance](governance.md) -- [InstructLab Code of Conduct](CODE_OF_CONDUCT.md) +* [InstructLab Code of Conduct](CODE_OF_CONDUCT.md) -- You must agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/) by signing off your commits in your pull requests. The Developer Certificate of Origin (DCO) is a lightweight way for contributors to certify that they wrote or otherwise have the right to submit the code they are contributing to the project. Here is the full [text of the DCO](https://developercertificate.org/), reformatted for readability: +* You must agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/) by signing off your commits in your pull requests. The Developer Certificate of Origin (DCO) is a lightweight way for contributors to certify that they wrote or otherwise have the right to submit the code they are contributing to the project. Here is the full [text of the DCO](https://developercertificate.org/), reformatted for readability: > By making a contribution to this project, I certify that: > @@ -97,7 +97,7 @@ Project Maintainers are first and foremost contributors that have shown they are > d. I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. Contributors _sign-off_ that they adhere to these requirements by adding a `Signed-off-by` line to commit messages. For more information about how the DCO works with this project, see [Developer Certificate of Origin (DCO)](https://github.com/instructlab/community/blob/main/CONTRIBUTING.md#developer-certificate-of-origin-dco). - + ## Licenses Distributed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). @@ -108,15 +108,15 @@ If you would like to see the detailed LICENSE click, see [LICENSE](LICENSE). ## Contact resources -- [InstructLab Slack](https://instruct-lab.slack.com). See the InstuctLab Slack Guide for directions on how to join. -- [InstructLab Slack Guide](InstructLabSlackGuide.md) -- [InstructLab Slack Moderation Guide](InstructLabSlackModerationGuide.md) -- [InstructLab Mailing lists](https://github.com/instructlab/community/blob/main/Collaboration.md#aliases-and-mailing-lists-catalog) -- [Discussion](https://github.com/orgs/instructlab/discussions). +* [InstructLab Slack](https://instruct-lab.slack.com). See the InstuctLab Slack Guide for directions on how to join. +* [InstructLab Slack Guide](InstructLabSlackGuide.md) +* [InstructLab Slack Moderation Guide](InstructLabSlackModerationGuide.md) +* [InstructLab Mailing lists](https://github.com/instructlab/community/blob/main/Collaboration.md#aliases-and-mailing-lists-catalog) +* [Discussion](https://github.com/orgs/instructlab/discussions). ## Quick Links -# [FAQ](FAQ.md) +* [FAQ](FAQ.md) * [LICENSE](LICENSE) * [README](README.md) * [CONTRIBUTING](CONTRIBUTING.md) diff --git a/docs/DataSources.md b/docs/DataSources.md index 04bddb6..bc7110c 100644 --- a/docs/DataSources.md +++ b/docs/DataSources.md @@ -1,18 +1,20 @@ -# [Data Sources](#Data-Sources) +# [Data Sources](#data-sources) + The purpose of this document is to provide attribution required for data used in the InstructLab project including -data for pre-training, knowledge data, and skills data. +data for pre-training, knowledge data, and skills data. -For new contributions to InstructLab that require data along with -the submission, we have [thorough guidelines](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md#for-your-attributiontxt-file) on how to provide -attribution for these data sources. +For new contributions to InstructLab that require data along with +the submission, we have [thorough guidelines](https://github.com/instructlab/taxonomy/blob/main/CONTRIBUTING.md#for-your-attributiontxt-file) on how to provide +attribution for these data sources. For data used to create the artifacts of the InsructLab project prior to its being open sourced, -we are [aware that this list is not comprehensive](https://github.com/instructlab/taxonomy/issues/255) at the time of creating it. +we are [aware that this list is not comprehensive](https://github.com/instructlab/taxonomy/issues/255) at the time of creating it. The project maintainers will augment and update it over time to the best of our abilities. -## [Textbooks: Knowledge](#Texbooks-Knowledge) +## [Textbooks: Knowledge](#textbooks-knowledge) + The following openly licensed textbook works were used as knowledge source seeds in the knowledge synthetic data -generation pipeline. +generation pipeline. | Dataset Name | Knowledge Taxonomy Location | License (where possible, use [SPDX License Identifier](https://spdx.org/licenses/)) | Creator Names | Copyright | | ------------ | --------------------------- | ------- | ------------- | --------- | @@ -37,7 +39,7 @@ generation pipeline. | [Introductory Statistics](https://openstax.org/details/books/introductory-statistics) | high_school_mathmatics | CC-BY-4.0 |**Senior Contributing Authors**: Barbara Illowsky, De Anza College; Susan Dean, De Anza College | Copright 2018 Rice University | | [Statistics](https://openstax.org/details/books/statistics) | high_school_mathematics | CC-BY-4.0 |**Senior Contributing Authors**: Barbara Illowsky, De Anza College; Susan Dean, De Anza College | Copright 2020 Texas Education Agency (TEA) | | [College Algebra 2e](https://openstax.org/details/books/college-algebra-2e) | high_school_mathematics | CC-BY-4.0 |**Senior Contributing Author**: Jay Abramson, Arizona State University | Copright 2021 Rice University | -| [Applied Calculus](https://mathbooks.unl.edu/BCalculus/colophon-1.html) | high_school_mathmatics | CC-BY-SA-4.0 | Kevin Gonzales, Eric Hopkins, Catherine Zimmitti, Cheryl Kane; Modified to fit Applied Calculus from Coordinated Calculus by Nathan Wakefield et. al.; Based upon Active Calculus by Matthew Boelkins | Copyright 2018 - 2021 University of Nebraska - Lincoln, Department of Mathematics| +| [Applied Calculus](https://mathbooks.unl.edu/BCalculus/colophon-1.html) | high_school_mathmatics | CC-BY-SA-4.0 | Kevin Gonzales, Eric Hopkins, Catherine Zimmitti, Cheryl Kane; Modified to fit Applied Calculus from Coordinated Calculus by Nathan Wakefield et. al.; Based upon Active Calculus by Matthew Boelkins | Copyright 2018 - 2021 University of Nebraska - Lincoln, Department of Mathematics| | [Coordinated Calculus](https://mathbooks.unl.edu/Calculus) | high_school_mathematics | CC-BY-SA-4.0 | Nathan Wakefield, Christine Kelley, Marla Williams, Michelle Haver, Lawrence Seminario-Romero, Robert Huben, Aurora Marks, Stephanie Prahl; Based upon Active Calculus by Matthew Boelkins | Copyright 2019 University of Nebraska - Lincoln, Department of Mathematics | | [Coordinated Multivariable Calculus](https://mathbooks.unl.edu/MultiVarCalc/colophon-1.html)| high_school_mathematics | CC-BY-NC-SA-4.0 | Steve Schlicker, Mitchel T. Keller, Nicholas Long, Zach Norwood, Audrey Goodnight; Based on Active Calculus | Copyright 2013 - 2022 Steven Schlicker, Mitchel T. Keller, and Nicholas Long | | [Principles of Economics 3e](https://openstax.org/details/books/principles-economics-3e) | high_school_microeconomics | CC-BY-4.0 | **Senior Contributing Authors**: Steven A. Greenlaw, University of Mary Washington; David Shapiro, Pennsylvania State University; Daniel MacDonald, California State University, San Bernardino | Copyright 2022 Rice University | @@ -48,20 +50,22 @@ generation pipeline. | [World History, Volume 1: to 1500](https://openstax.org/details/books/world-history-volume-1) | high_school_world_history | CC-BY-4.0 | **Senior Contributing Authors**: Ann Kordas, Johnson & Wales University; Ryan J. Lynch, Columbus State University; Brooke Nelson, formerly California State University; Julie Tatlock, Mount Mary University | Copyright 2023 Rice University | | [World History, Volume 2: from 1400](https://openstax.org/details/books/world-history-volume-2) | high_school_world_history | CC-BY-4.0 | **Senior Contributing Authors**: Ann Kordas, Johnson & Wales University; Ryan J. Lynch, Columbus State University; Brooke Nelson, formerly California State University; Julie Tatlock, Mount Mary University | Copyright 2022 Rice University | | [Introduction to Philosophy](https://openstax.org/details/books/introduction-philosophy) | philosophy | CC-BY-4.0 | **Senior Contributing Author**: Nathan Smith, Houston Community College | Copyright 2022 Rice University | -| [Principles of Financial Accounting](https://open.umn.edu/opentextbooks/textbooks/principles-of-financial-accounting) | financial_accounting | CC-BY-SA-4.0 | Christine Jonick | Copyright 2017 University of North Georgia Press | +| [Principles of Financial Accounting](https://open.umn.edu/opentextbooks/textbooks/principles-of-financial-accounting) | financial_accounting | CC-BY-SA-4.0 | Christine Jonick | Copyright 2017 University of North Georgia Press | | [Intermediate Financial Accounting Volume 1](https://open.umn.edu/opentextbooks/textbooks/intermediate-financial-accounting-volume-1) | financial_accounting | CC-BY-4.0 | Glenn Arnold, Athabasca University and Suzanne Kyle | Copyright 2016 Vretta-Lyryx Inc. | | [Intermediate Financial Accounting Volume 2](https://open.umn.edu/opentextbooks/textbooks/intermediate-financial-accounting-volume-2)) | financial_accounting | CC-BY-4.0 | Glenn Arnold, Athabasca University and Suzanne Kyle | Copyright 2017-2021 Vretta-Lyryx Inc. | | [Introduction to Political Science](https://openstax.org/details/books/introduction-political-science) | political_science | CC-BY-2.0 | **Senior Contributing Authors**: Mark Carl Rom, Georgetown University; Masaki Hidaka, American University; Rachel Bzostek Walker, Collin College | Copyright 2022 Rice University| | [Introduction to Anthropology](https://openstax.org/details/books/introduction-anthropology) | anthropology | CC-BY-4.0 | **Senior Contributing Authors**: Jennifer Hasty, University of Pennsylvania; David G. Lewis, Oregon State University; Marjorie M. Snipes, University of West Georgia | Copright 2022 Rice University | -## [Data Sets](#Data-Sets) +## [Data Sets](#data-sets) + | Dataset Name | Knowledge Taxonomy Location | License and/or Copyright | Other Citiation Information | | ------------ | --------------------------- | --------------------- | --------- | -|[IBM Redbooks](https://www.redbooks.ibm.com/) | ibm_redbooks | Copyright IBM [with some rights available](https://www.redbooks.ibm.com/copyright) | +|[IBM Redbooks](https://www.redbooks.ibm.com/) | ibm_redbooks | Copyright IBM [with some rights available](https://www.redbooks.ibm.com/copyright) | | The following openly licensed datasets were used as foundational and safety seeds in the skills synthetic data generation pipeline -## [Foundational Skills](#Foundational-Skills) +## [Foundational Skills](#foundational-skills) + The following openly licensed datasets were used as foundational and safety seeds in the skills synthetic data generation pipeline | Dataset Name | License | @@ -88,10 +92,9 @@ The following openly licensed datasets were used as foundational and safety seed | [Flan Collection](https://github.com/google-research/FLAN) | Apache-2.0 | | [Chatbot arena (Prompts Only)](https://huggingface.co/datasets/lmsys/chatbot_arena_conversations) | CC-BY-4.0 | -## [Saftey Skills](#Saftey-Skills) +## [Saftey Skills](#saftey-skills) | Dataset Name | License | | --------------|----------| | [OASST2](https://huggingface.co/datasets/OpenAssistant/oasst2) | Apache-2.0 | | [Prosocial-dialog](https://huggingface.co/datasets/allenai/prosocial-dialog) | CC-BY-4.0 | - diff --git a/docs/README.md b/docs/README.md index 6f0dc69..c1560ce 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,24 +9,28 @@ We accept contributions of both Skills and Knowledge to InstructLab. ## Learning Topics Skills + - [Getting started with Skill contributions](https://github.com/instructlab/taxonomy/blob/main/README.md#getting-started-with-skill-contributions) - [Skills guide](https://github.com/instructlab/taxonomy/blob/main/docs/SKILLS_GUIDE.md) Knowledge + - [Getting started with Knowledge contributions](https://github.com/instructlab/taxonomy/blob/main/README.md#getting-started-with-knowledge-contributions) - [Knowledge guide](https://github.com/instructlab/taxonomy/blob/main/docs/KNOWLEDGE_GUIDE.md) + ## License Limitations If you would like to contribute any third-party data to either the Skills or Knowledge taxonomies, you must ensure the license on the data is unrestricted for commercial use. This applies to: -* Data embedded in `.md` files as knowledge -* Data offered as `context` in `qna.yaml` files for skills -* Citing your sources in your `attribution.txt` file -* Questions and answers sourced from elsewhere and used as `qna.yaml` submissions +- Data embedded in `.md` files as knowledge +- Data offered as `context` in `qna.yaml` files for skills +- Citing your sources in your `attribution.txt` file +- Questions and answers sourced from elsewhere and used as `qna.yaml` submissions For this project, unless the file says otherwise, or unless the attributed source provided in the file says otherwise, the relevant open source license is the Apache License, Version 2.0. All contributions that leverage third party content should either come from the public domain (e.g. out of copyright, or .gov sites) or be licensed with an open data license that does not restrict commercial use or the creation of derivative works, including the following license types: + - CC0 - CDLA-Permissive-2.0 - CC-BY-4.0 @@ -36,6 +40,7 @@ For this project, unless the file says otherwise, or unless the attributed sourc Any third party content contributed to this project undergoes modifications in order to formulate it in the templated format required for submission to this project. ## Works Cited on this Page -* [Christianity in Nepal](https://en.wikipedia.org/wiki/Christianity_in_Nepal), Wikipedia, Wikimedia Foundation, 24 April 2024. -* [Concepts of Biology - 1st Canadian Edition](https://opentextbc.ca/biology/), Chapter 11.3 _Circulatory and Respiratory Systems_. Copyright 2015 by Charles Molnar and Jane Gair, licensed under a [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/). No modifications were made to the text. -* [World History, volume 2: from 1400](https://openstax.org/details/books/world-history-volume-2), Chapter 6.3 _Capitalism and the First Industrial Revolution_. Copyright 2022 Rice University, licensed under a [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/). No modifications were made to the text. + +- [Christianity in Nepal](https://en.wikipedia.org/wiki/Christianity_in_Nepal), Wikipedia, Wikimedia Foundation, 24 April 2024. +- [Concepts of Biology - 1st Canadian Edition](https://opentextbc.ca/biology/), Chapter 11.3 _Circulatory and Respiratory Systems_. Copyright 2015 by Charles Molnar and Jane Gair, licensed under a [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/). No modifications were made to the text. +- [World History, volume 2: from 1400](https://openstax.org/details/books/world-history-volume-2), Chapter 6.3 _Capitalism and the First Industrial Revolution_. Copyright 2022 Rice University, licensed under a [Creative Commons Attribution 4.0 License](https://creativecommons.org/licenses/by/4.0/). No modifications were made to the text. diff --git a/governance.md b/governance.md index c5cfbba..6f5f289 100644 --- a/governance.md +++ b/governance.md @@ -6,18 +6,18 @@ The following document outlines how the InstructLab project governance operates. InstructLab is made up of several projects that are defined as codebases and services with different release cycles. Collectively, these enable large-model development. Currently, these projects include the following: -* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` command-line interface (CLI) tool. -* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. +* [`ilab` command-line interface (CLI) tool](https://github.com/instructlab/instructlab). This repository is responsible for the `ilab` command-line interface (CLI) tool. +* [taxonomy tree](https://github.com/instructlab/taxonomy). This repository is responsible for the taxonomy tree that allows you to create models tuned with your data. ## Governance Structure and Roadmap -The InstructLab Project will evolve into a two-level governance structure with an Oversight Committee and [Project Maintainers](https://github.com/instructlab/community/blob/main/MAINTAINERS.md). +The InstructLab Project will evolve into a two-level governance structure with an Oversight Committee and [Project Maintainers](https://github.com/instructlab/community/blob/main/MAINTAINERS.md). At launch, the InstructLab Project will not have an Oversight Committee to avoid unnecessary overhead. After the majority of project Maintainers agree that the project has grown to the point where an Oversight Committee is necessary, project Maintainers will begin establishment. Until the Oversight Committee is constituted, duties of the Oversight Committee will be assumed by project Maintainers. Except where otherwise noted, decisions should always start at the most local level of project governance. For example, decisions that affect only one project, such as the taxonomy repository and not the `ilab` CLI tool, can happen within that project. While communication between the different project teams is important as they are all interconnected, minor decisions do not need organization-wide consensus and can be moved forward at the project level. -Changes in maintainership and other governance are currently announced on the InstructLab community Slack channel. Directions to join the Slack channel can be found [here](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). In the future, a mailing list will be established. +Changes in maintainership and other governance are currently announced on the InstructLab community Slack channel. Directions to join the Slack channel can be found [here](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). In the future, a mailing list will be established. ## Project Maintainers overview @@ -56,7 +56,7 @@ Until the Oversight Committee is selected, these duties will be carried out by t Cross-component technical policies are out of scope for this project governance document, but can be found in the [InstructLab Enhancements Repo](https://github.com/instructlab/enhancements/blob/main/README.md). -### Oversight Committee selection process +### Oversight Committee selection process The Oversight Committee will be selected and maintained using the following process: @@ -76,15 +76,15 @@ The election will proceed according to the following process: ### Resignation or Departure from the Maintainer or the Oversight Committee role -Project Maintainers or Oversight Committee members may resign or could be expelled as follows: +Project Maintainers or Oversight Committee members may resign or could be expelled as follows: -* Maintainers or an Oversight Committee member may step down through email. Within 7 calendar days, organization contributors and Maintainers will be notified on the InstructLab community Slack channel. Directions to join the Slack channel can be found [here](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). In the future, a mailing list will be established. +* Maintainers or an Oversight Committee member may step down through email. Within 7 calendar days, organization contributors and Maintainers will be notified on the InstructLab community Slack channel. Directions to join the Slack channel can be found [here](https://github.com/instructlab/community/blob/main/InstructLabSlackGuide.md). In the future, a mailing list will be established. * After an Oversight Committee member steps down, they become an emeritus Maintainer. -* Maintainers and Committee members MUST remain active on the project. In the event that an Oversight Committee member or a Maintainer is unresponsive or inactive for more than 3 months, they may be removed by a supermajority vote. +* Maintainers and Committee members MUST remain active on the project. In the event that an Oversight Committee member or a Maintainer is unresponsive or inactive for more than 3 months, they may be removed by a supermajority vote. -* Maintainers and Oversight Committee members who have violated the [Code of Conduct](https://github.com/instructlab/community/blob/main/CODE_OF_CONDUCT.md) may be removed by a supermajority vote of the remaining Oversight Committee members. +* Maintainers and Oversight Committee members who have violated the [Code of Conduct](https://github.com/instructlab/community/blob/main/CODE_OF_CONDUCT.md) may be removed by a supermajority vote of the remaining Oversight Committee members. ## Decision making at the InstructLab organization level diff --git a/tools/maintainers/README.md b/tools/maintainers/README.md index 42b4036..96ba184 100644 --- a/tools/maintainers/README.md +++ b/tools/maintainers/README.md @@ -19,6 +19,6 @@ Changes to team membership should happen in this way: From the root of the repo: -``` +```shell tools/maintainers/maintainers.py tools/maintainers/teams.yaml > MAINTAINERS.md ```